Merge "jni: Add @CriticalNative support for MIPS32/MIPS64"
diff --git a/Android.mk b/Android.mk
index 2647268..b2716cd 100644
--- a/Android.mk
+++ b/Android.mk
@@ -568,3 +568,11 @@
 #   m art-boot-image ART_BOOT_IMAGE_EXTRA_ARGS=--dump-init-failures=fails.txt
 .PHONY: art-boot-image
 art-boot-image: $(DEFAULT_DEX_PREOPT_BUILT_IMAGE_FILENAME)
+
+.PHONY: art-job-images
+art-job-images: \
+  $(DEFAULT_DEX_PREOPT_BUILT_IMAGE_FILENAME) \
+  $(2ND_DEFAULT_DEX_PREOPT_BUILT_IMAGE_FILENAME) \
+  $(HOST_OUT_EXECUTABLES)/dex2oats \
+  $(HOST_OUT_EXECUTABLES)/dex2oatds \
+  $(HOST_OUT_EXECUTABLES)/profman
diff --git a/benchmark/const-class/info.txt b/benchmark/const-class/info.txt
new file mode 100644
index 0000000..ed0b827
--- /dev/null
+++ b/benchmark/const-class/info.txt
@@ -0,0 +1 @@
+Benchmarks for repeating const-class instructions in a loop.
diff --git a/benchmark/const-class/src/ConstClassBenchmark.java b/benchmark/const-class/src/ConstClassBenchmark.java
new file mode 100644
index 0000000..d45b49f
--- /dev/null
+++ b/benchmark/const-class/src/ConstClassBenchmark.java
@@ -0,0 +1,1071 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class ConstClassBenchmark {
+    // Define 1025 classes with consecutive type indexes in the dex file.
+    // The tests below rely on the knowledge that ART uses the low 10 bits
+    // of the type index as the hash into DexCache types array.
+    // Note: n == n + 1024 (mod 2^10), n + 1 != n + 1023 (mod 2^10).
+    public static class TestClass_0000 {}
+    public static class TestClass_0001 {}
+    public static class TestClass_0002 {}
+    public static class TestClass_0003 {}
+    public static class TestClass_0004 {}
+    public static class TestClass_0005 {}
+    public static class TestClass_0006 {}
+    public static class TestClass_0007 {}
+    public static class TestClass_0008 {}
+    public static class TestClass_0009 {}
+    public static class TestClass_0010 {}
+    public static class TestClass_0011 {}
+    public static class TestClass_0012 {}
+    public static class TestClass_0013 {}
+    public static class TestClass_0014 {}
+    public static class TestClass_0015 {}
+    public static class TestClass_0016 {}
+    public static class TestClass_0017 {}
+    public static class TestClass_0018 {}
+    public static class TestClass_0019 {}
+    public static class TestClass_0020 {}
+    public static class TestClass_0021 {}
+    public static class TestClass_0022 {}
+    public static class TestClass_0023 {}
+    public static class TestClass_0024 {}
+    public static class TestClass_0025 {}
+    public static class TestClass_0026 {}
+    public static class TestClass_0027 {}
+    public static class TestClass_0028 {}
+    public static class TestClass_0029 {}
+    public static class TestClass_0030 {}
+    public static class TestClass_0031 {}
+    public static class TestClass_0032 {}
+    public static class TestClass_0033 {}
+    public static class TestClass_0034 {}
+    public static class TestClass_0035 {}
+    public static class TestClass_0036 {}
+    public static class TestClass_0037 {}
+    public static class TestClass_0038 {}
+    public static class TestClass_0039 {}
+    public static class TestClass_0040 {}
+    public static class TestClass_0041 {}
+    public static class TestClass_0042 {}
+    public static class TestClass_0043 {}
+    public static class TestClass_0044 {}
+    public static class TestClass_0045 {}
+    public static class TestClass_0046 {}
+    public static class TestClass_0047 {}
+    public static class TestClass_0048 {}
+    public static class TestClass_0049 {}
+    public static class TestClass_0050 {}
+    public static class TestClass_0051 {}
+    public static class TestClass_0052 {}
+    public static class TestClass_0053 {}
+    public static class TestClass_0054 {}
+    public static class TestClass_0055 {}
+    public static class TestClass_0056 {}
+    public static class TestClass_0057 {}
+    public static class TestClass_0058 {}
+    public static class TestClass_0059 {}
+    public static class TestClass_0060 {}
+    public static class TestClass_0061 {}
+    public static class TestClass_0062 {}
+    public static class TestClass_0063 {}
+    public static class TestClass_0064 {}
+    public static class TestClass_0065 {}
+    public static class TestClass_0066 {}
+    public static class TestClass_0067 {}
+    public static class TestClass_0068 {}
+    public static class TestClass_0069 {}
+    public static class TestClass_0070 {}
+    public static class TestClass_0071 {}
+    public static class TestClass_0072 {}
+    public static class TestClass_0073 {}
+    public static class TestClass_0074 {}
+    public static class TestClass_0075 {}
+    public static class TestClass_0076 {}
+    public static class TestClass_0077 {}
+    public static class TestClass_0078 {}
+    public static class TestClass_0079 {}
+    public static class TestClass_0080 {}
+    public static class TestClass_0081 {}
+    public static class TestClass_0082 {}
+    public static class TestClass_0083 {}
+    public static class TestClass_0084 {}
+    public static class TestClass_0085 {}
+    public static class TestClass_0086 {}
+    public static class TestClass_0087 {}
+    public static class TestClass_0088 {}
+    public static class TestClass_0089 {}
+    public static class TestClass_0090 {}
+    public static class TestClass_0091 {}
+    public static class TestClass_0092 {}
+    public static class TestClass_0093 {}
+    public static class TestClass_0094 {}
+    public static class TestClass_0095 {}
+    public static class TestClass_0096 {}
+    public static class TestClass_0097 {}
+    public static class TestClass_0098 {}
+    public static class TestClass_0099 {}
+    public static class TestClass_0100 {}
+    public static class TestClass_0101 {}
+    public static class TestClass_0102 {}
+    public static class TestClass_0103 {}
+    public static class TestClass_0104 {}
+    public static class TestClass_0105 {}
+    public static class TestClass_0106 {}
+    public static class TestClass_0107 {}
+    public static class TestClass_0108 {}
+    public static class TestClass_0109 {}
+    public static class TestClass_0110 {}
+    public static class TestClass_0111 {}
+    public static class TestClass_0112 {}
+    public static class TestClass_0113 {}
+    public static class TestClass_0114 {}
+    public static class TestClass_0115 {}
+    public static class TestClass_0116 {}
+    public static class TestClass_0117 {}
+    public static class TestClass_0118 {}
+    public static class TestClass_0119 {}
+    public static class TestClass_0120 {}
+    public static class TestClass_0121 {}
+    public static class TestClass_0122 {}
+    public static class TestClass_0123 {}
+    public static class TestClass_0124 {}
+    public static class TestClass_0125 {}
+    public static class TestClass_0126 {}
+    public static class TestClass_0127 {}
+    public static class TestClass_0128 {}
+    public static class TestClass_0129 {}
+    public static class TestClass_0130 {}
+    public static class TestClass_0131 {}
+    public static class TestClass_0132 {}
+    public static class TestClass_0133 {}
+    public static class TestClass_0134 {}
+    public static class TestClass_0135 {}
+    public static class TestClass_0136 {}
+    public static class TestClass_0137 {}
+    public static class TestClass_0138 {}
+    public static class TestClass_0139 {}
+    public static class TestClass_0140 {}
+    public static class TestClass_0141 {}
+    public static class TestClass_0142 {}
+    public static class TestClass_0143 {}
+    public static class TestClass_0144 {}
+    public static class TestClass_0145 {}
+    public static class TestClass_0146 {}
+    public static class TestClass_0147 {}
+    public static class TestClass_0148 {}
+    public static class TestClass_0149 {}
+    public static class TestClass_0150 {}
+    public static class TestClass_0151 {}
+    public static class TestClass_0152 {}
+    public static class TestClass_0153 {}
+    public static class TestClass_0154 {}
+    public static class TestClass_0155 {}
+    public static class TestClass_0156 {}
+    public static class TestClass_0157 {}
+    public static class TestClass_0158 {}
+    public static class TestClass_0159 {}
+    public static class TestClass_0160 {}
+    public static class TestClass_0161 {}
+    public static class TestClass_0162 {}
+    public static class TestClass_0163 {}
+    public static class TestClass_0164 {}
+    public static class TestClass_0165 {}
+    public static class TestClass_0166 {}
+    public static class TestClass_0167 {}
+    public static class TestClass_0168 {}
+    public static class TestClass_0169 {}
+    public static class TestClass_0170 {}
+    public static class TestClass_0171 {}
+    public static class TestClass_0172 {}
+    public static class TestClass_0173 {}
+    public static class TestClass_0174 {}
+    public static class TestClass_0175 {}
+    public static class TestClass_0176 {}
+    public static class TestClass_0177 {}
+    public static class TestClass_0178 {}
+    public static class TestClass_0179 {}
+    public static class TestClass_0180 {}
+    public static class TestClass_0181 {}
+    public static class TestClass_0182 {}
+    public static class TestClass_0183 {}
+    public static class TestClass_0184 {}
+    public static class TestClass_0185 {}
+    public static class TestClass_0186 {}
+    public static class TestClass_0187 {}
+    public static class TestClass_0188 {}
+    public static class TestClass_0189 {}
+    public static class TestClass_0190 {}
+    public static class TestClass_0191 {}
+    public static class TestClass_0192 {}
+    public static class TestClass_0193 {}
+    public static class TestClass_0194 {}
+    public static class TestClass_0195 {}
+    public static class TestClass_0196 {}
+    public static class TestClass_0197 {}
+    public static class TestClass_0198 {}
+    public static class TestClass_0199 {}
+    public static class TestClass_0200 {}
+    public static class TestClass_0201 {}
+    public static class TestClass_0202 {}
+    public static class TestClass_0203 {}
+    public static class TestClass_0204 {}
+    public static class TestClass_0205 {}
+    public static class TestClass_0206 {}
+    public static class TestClass_0207 {}
+    public static class TestClass_0208 {}
+    public static class TestClass_0209 {}
+    public static class TestClass_0210 {}
+    public static class TestClass_0211 {}
+    public static class TestClass_0212 {}
+    public static class TestClass_0213 {}
+    public static class TestClass_0214 {}
+    public static class TestClass_0215 {}
+    public static class TestClass_0216 {}
+    public static class TestClass_0217 {}
+    public static class TestClass_0218 {}
+    public static class TestClass_0219 {}
+    public static class TestClass_0220 {}
+    public static class TestClass_0221 {}
+    public static class TestClass_0222 {}
+    public static class TestClass_0223 {}
+    public static class TestClass_0224 {}
+    public static class TestClass_0225 {}
+    public static class TestClass_0226 {}
+    public static class TestClass_0227 {}
+    public static class TestClass_0228 {}
+    public static class TestClass_0229 {}
+    public static class TestClass_0230 {}
+    public static class TestClass_0231 {}
+    public static class TestClass_0232 {}
+    public static class TestClass_0233 {}
+    public static class TestClass_0234 {}
+    public static class TestClass_0235 {}
+    public static class TestClass_0236 {}
+    public static class TestClass_0237 {}
+    public static class TestClass_0238 {}
+    public static class TestClass_0239 {}
+    public static class TestClass_0240 {}
+    public static class TestClass_0241 {}
+    public static class TestClass_0242 {}
+    public static class TestClass_0243 {}
+    public static class TestClass_0244 {}
+    public static class TestClass_0245 {}
+    public static class TestClass_0246 {}
+    public static class TestClass_0247 {}
+    public static class TestClass_0248 {}
+    public static class TestClass_0249 {}
+    public static class TestClass_0250 {}
+    public static class TestClass_0251 {}
+    public static class TestClass_0252 {}
+    public static class TestClass_0253 {}
+    public static class TestClass_0254 {}
+    public static class TestClass_0255 {}
+    public static class TestClass_0256 {}
+    public static class TestClass_0257 {}
+    public static class TestClass_0258 {}
+    public static class TestClass_0259 {}
+    public static class TestClass_0260 {}
+    public static class TestClass_0261 {}
+    public static class TestClass_0262 {}
+    public static class TestClass_0263 {}
+    public static class TestClass_0264 {}
+    public static class TestClass_0265 {}
+    public static class TestClass_0266 {}
+    public static class TestClass_0267 {}
+    public static class TestClass_0268 {}
+    public static class TestClass_0269 {}
+    public static class TestClass_0270 {}
+    public static class TestClass_0271 {}
+    public static class TestClass_0272 {}
+    public static class TestClass_0273 {}
+    public static class TestClass_0274 {}
+    public static class TestClass_0275 {}
+    public static class TestClass_0276 {}
+    public static class TestClass_0277 {}
+    public static class TestClass_0278 {}
+    public static class TestClass_0279 {}
+    public static class TestClass_0280 {}
+    public static class TestClass_0281 {}
+    public static class TestClass_0282 {}
+    public static class TestClass_0283 {}
+    public static class TestClass_0284 {}
+    public static class TestClass_0285 {}
+    public static class TestClass_0286 {}
+    public static class TestClass_0287 {}
+    public static class TestClass_0288 {}
+    public static class TestClass_0289 {}
+    public static class TestClass_0290 {}
+    public static class TestClass_0291 {}
+    public static class TestClass_0292 {}
+    public static class TestClass_0293 {}
+    public static class TestClass_0294 {}
+    public static class TestClass_0295 {}
+    public static class TestClass_0296 {}
+    public static class TestClass_0297 {}
+    public static class TestClass_0298 {}
+    public static class TestClass_0299 {}
+    public static class TestClass_0300 {}
+    public static class TestClass_0301 {}
+    public static class TestClass_0302 {}
+    public static class TestClass_0303 {}
+    public static class TestClass_0304 {}
+    public static class TestClass_0305 {}
+    public static class TestClass_0306 {}
+    public static class TestClass_0307 {}
+    public static class TestClass_0308 {}
+    public static class TestClass_0309 {}
+    public static class TestClass_0310 {}
+    public static class TestClass_0311 {}
+    public static class TestClass_0312 {}
+    public static class TestClass_0313 {}
+    public static class TestClass_0314 {}
+    public static class TestClass_0315 {}
+    public static class TestClass_0316 {}
+    public static class TestClass_0317 {}
+    public static class TestClass_0318 {}
+    public static class TestClass_0319 {}
+    public static class TestClass_0320 {}
+    public static class TestClass_0321 {}
+    public static class TestClass_0322 {}
+    public static class TestClass_0323 {}
+    public static class TestClass_0324 {}
+    public static class TestClass_0325 {}
+    public static class TestClass_0326 {}
+    public static class TestClass_0327 {}
+    public static class TestClass_0328 {}
+    public static class TestClass_0329 {}
+    public static class TestClass_0330 {}
+    public static class TestClass_0331 {}
+    public static class TestClass_0332 {}
+    public static class TestClass_0333 {}
+    public static class TestClass_0334 {}
+    public static class TestClass_0335 {}
+    public static class TestClass_0336 {}
+    public static class TestClass_0337 {}
+    public static class TestClass_0338 {}
+    public static class TestClass_0339 {}
+    public static class TestClass_0340 {}
+    public static class TestClass_0341 {}
+    public static class TestClass_0342 {}
+    public static class TestClass_0343 {}
+    public static class TestClass_0344 {}
+    public static class TestClass_0345 {}
+    public static class TestClass_0346 {}
+    public static class TestClass_0347 {}
+    public static class TestClass_0348 {}
+    public static class TestClass_0349 {}
+    public static class TestClass_0350 {}
+    public static class TestClass_0351 {}
+    public static class TestClass_0352 {}
+    public static class TestClass_0353 {}
+    public static class TestClass_0354 {}
+    public static class TestClass_0355 {}
+    public static class TestClass_0356 {}
+    public static class TestClass_0357 {}
+    public static class TestClass_0358 {}
+    public static class TestClass_0359 {}
+    public static class TestClass_0360 {}
+    public static class TestClass_0361 {}
+    public static class TestClass_0362 {}
+    public static class TestClass_0363 {}
+    public static class TestClass_0364 {}
+    public static class TestClass_0365 {}
+    public static class TestClass_0366 {}
+    public static class TestClass_0367 {}
+    public static class TestClass_0368 {}
+    public static class TestClass_0369 {}
+    public static class TestClass_0370 {}
+    public static class TestClass_0371 {}
+    public static class TestClass_0372 {}
+    public static class TestClass_0373 {}
+    public static class TestClass_0374 {}
+    public static class TestClass_0375 {}
+    public static class TestClass_0376 {}
+    public static class TestClass_0377 {}
+    public static class TestClass_0378 {}
+    public static class TestClass_0379 {}
+    public static class TestClass_0380 {}
+    public static class TestClass_0381 {}
+    public static class TestClass_0382 {}
+    public static class TestClass_0383 {}
+    public static class TestClass_0384 {}
+    public static class TestClass_0385 {}
+    public static class TestClass_0386 {}
+    public static class TestClass_0387 {}
+    public static class TestClass_0388 {}
+    public static class TestClass_0389 {}
+    public static class TestClass_0390 {}
+    public static class TestClass_0391 {}
+    public static class TestClass_0392 {}
+    public static class TestClass_0393 {}
+    public static class TestClass_0394 {}
+    public static class TestClass_0395 {}
+    public static class TestClass_0396 {}
+    public static class TestClass_0397 {}
+    public static class TestClass_0398 {}
+    public static class TestClass_0399 {}
+    public static class TestClass_0400 {}
+    public static class TestClass_0401 {}
+    public static class TestClass_0402 {}
+    public static class TestClass_0403 {}
+    public static class TestClass_0404 {}
+    public static class TestClass_0405 {}
+    public static class TestClass_0406 {}
+    public static class TestClass_0407 {}
+    public static class TestClass_0408 {}
+    public static class TestClass_0409 {}
+    public static class TestClass_0410 {}
+    public static class TestClass_0411 {}
+    public static class TestClass_0412 {}
+    public static class TestClass_0413 {}
+    public static class TestClass_0414 {}
+    public static class TestClass_0415 {}
+    public static class TestClass_0416 {}
+    public static class TestClass_0417 {}
+    public static class TestClass_0418 {}
+    public static class TestClass_0419 {}
+    public static class TestClass_0420 {}
+    public static class TestClass_0421 {}
+    public static class TestClass_0422 {}
+    public static class TestClass_0423 {}
+    public static class TestClass_0424 {}
+    public static class TestClass_0425 {}
+    public static class TestClass_0426 {}
+    public static class TestClass_0427 {}
+    public static class TestClass_0428 {}
+    public static class TestClass_0429 {}
+    public static class TestClass_0430 {}
+    public static class TestClass_0431 {}
+    public static class TestClass_0432 {}
+    public static class TestClass_0433 {}
+    public static class TestClass_0434 {}
+    public static class TestClass_0435 {}
+    public static class TestClass_0436 {}
+    public static class TestClass_0437 {}
+    public static class TestClass_0438 {}
+    public static class TestClass_0439 {}
+    public static class TestClass_0440 {}
+    public static class TestClass_0441 {}
+    public static class TestClass_0442 {}
+    public static class TestClass_0443 {}
+    public static class TestClass_0444 {}
+    public static class TestClass_0445 {}
+    public static class TestClass_0446 {}
+    public static class TestClass_0447 {}
+    public static class TestClass_0448 {}
+    public static class TestClass_0449 {}
+    public static class TestClass_0450 {}
+    public static class TestClass_0451 {}
+    public static class TestClass_0452 {}
+    public static class TestClass_0453 {}
+    public static class TestClass_0454 {}
+    public static class TestClass_0455 {}
+    public static class TestClass_0456 {}
+    public static class TestClass_0457 {}
+    public static class TestClass_0458 {}
+    public static class TestClass_0459 {}
+    public static class TestClass_0460 {}
+    public static class TestClass_0461 {}
+    public static class TestClass_0462 {}
+    public static class TestClass_0463 {}
+    public static class TestClass_0464 {}
+    public static class TestClass_0465 {}
+    public static class TestClass_0466 {}
+    public static class TestClass_0467 {}
+    public static class TestClass_0468 {}
+    public static class TestClass_0469 {}
+    public static class TestClass_0470 {}
+    public static class TestClass_0471 {}
+    public static class TestClass_0472 {}
+    public static class TestClass_0473 {}
+    public static class TestClass_0474 {}
+    public static class TestClass_0475 {}
+    public static class TestClass_0476 {}
+    public static class TestClass_0477 {}
+    public static class TestClass_0478 {}
+    public static class TestClass_0479 {}
+    public static class TestClass_0480 {}
+    public static class TestClass_0481 {}
+    public static class TestClass_0482 {}
+    public static class TestClass_0483 {}
+    public static class TestClass_0484 {}
+    public static class TestClass_0485 {}
+    public static class TestClass_0486 {}
+    public static class TestClass_0487 {}
+    public static class TestClass_0488 {}
+    public static class TestClass_0489 {}
+    public static class TestClass_0490 {}
+    public static class TestClass_0491 {}
+    public static class TestClass_0492 {}
+    public static class TestClass_0493 {}
+    public static class TestClass_0494 {}
+    public static class TestClass_0495 {}
+    public static class TestClass_0496 {}
+    public static class TestClass_0497 {}
+    public static class TestClass_0498 {}
+    public static class TestClass_0499 {}
+    public static class TestClass_0500 {}
+    public static class TestClass_0501 {}
+    public static class TestClass_0502 {}
+    public static class TestClass_0503 {}
+    public static class TestClass_0504 {}
+    public static class TestClass_0505 {}
+    public static class TestClass_0506 {}
+    public static class TestClass_0507 {}
+    public static class TestClass_0508 {}
+    public static class TestClass_0509 {}
+    public static class TestClass_0510 {}
+    public static class TestClass_0511 {}
+    public static class TestClass_0512 {}
+    public static class TestClass_0513 {}
+    public static class TestClass_0514 {}
+    public static class TestClass_0515 {}
+    public static class TestClass_0516 {}
+    public static class TestClass_0517 {}
+    public static class TestClass_0518 {}
+    public static class TestClass_0519 {}
+    public static class TestClass_0520 {}
+    public static class TestClass_0521 {}
+    public static class TestClass_0522 {}
+    public static class TestClass_0523 {}
+    public static class TestClass_0524 {}
+    public static class TestClass_0525 {}
+    public static class TestClass_0526 {}
+    public static class TestClass_0527 {}
+    public static class TestClass_0528 {}
+    public static class TestClass_0529 {}
+    public static class TestClass_0530 {}
+    public static class TestClass_0531 {}
+    public static class TestClass_0532 {}
+    public static class TestClass_0533 {}
+    public static class TestClass_0534 {}
+    public static class TestClass_0535 {}
+    public static class TestClass_0536 {}
+    public static class TestClass_0537 {}
+    public static class TestClass_0538 {}
+    public static class TestClass_0539 {}
+    public static class TestClass_0540 {}
+    public static class TestClass_0541 {}
+    public static class TestClass_0542 {}
+    public static class TestClass_0543 {}
+    public static class TestClass_0544 {}
+    public static class TestClass_0545 {}
+    public static class TestClass_0546 {}
+    public static class TestClass_0547 {}
+    public static class TestClass_0548 {}
+    public static class TestClass_0549 {}
+    public static class TestClass_0550 {}
+    public static class TestClass_0551 {}
+    public static class TestClass_0552 {}
+    public static class TestClass_0553 {}
+    public static class TestClass_0554 {}
+    public static class TestClass_0555 {}
+    public static class TestClass_0556 {}
+    public static class TestClass_0557 {}
+    public static class TestClass_0558 {}
+    public static class TestClass_0559 {}
+    public static class TestClass_0560 {}
+    public static class TestClass_0561 {}
+    public static class TestClass_0562 {}
+    public static class TestClass_0563 {}
+    public static class TestClass_0564 {}
+    public static class TestClass_0565 {}
+    public static class TestClass_0566 {}
+    public static class TestClass_0567 {}
+    public static class TestClass_0568 {}
+    public static class TestClass_0569 {}
+    public static class TestClass_0570 {}
+    public static class TestClass_0571 {}
+    public static class TestClass_0572 {}
+    public static class TestClass_0573 {}
+    public static class TestClass_0574 {}
+    public static class TestClass_0575 {}
+    public static class TestClass_0576 {}
+    public static class TestClass_0577 {}
+    public static class TestClass_0578 {}
+    public static class TestClass_0579 {}
+    public static class TestClass_0580 {}
+    public static class TestClass_0581 {}
+    public static class TestClass_0582 {}
+    public static class TestClass_0583 {}
+    public static class TestClass_0584 {}
+    public static class TestClass_0585 {}
+    public static class TestClass_0586 {}
+    public static class TestClass_0587 {}
+    public static class TestClass_0588 {}
+    public static class TestClass_0589 {}
+    public static class TestClass_0590 {}
+    public static class TestClass_0591 {}
+    public static class TestClass_0592 {}
+    public static class TestClass_0593 {}
+    public static class TestClass_0594 {}
+    public static class TestClass_0595 {}
+    public static class TestClass_0596 {}
+    public static class TestClass_0597 {}
+    public static class TestClass_0598 {}
+    public static class TestClass_0599 {}
+    public static class TestClass_0600 {}
+    public static class TestClass_0601 {}
+    public static class TestClass_0602 {}
+    public static class TestClass_0603 {}
+    public static class TestClass_0604 {}
+    public static class TestClass_0605 {}
+    public static class TestClass_0606 {}
+    public static class TestClass_0607 {}
+    public static class TestClass_0608 {}
+    public static class TestClass_0609 {}
+    public static class TestClass_0610 {}
+    public static class TestClass_0611 {}
+    public static class TestClass_0612 {}
+    public static class TestClass_0613 {}
+    public static class TestClass_0614 {}
+    public static class TestClass_0615 {}
+    public static class TestClass_0616 {}
+    public static class TestClass_0617 {}
+    public static class TestClass_0618 {}
+    public static class TestClass_0619 {}
+    public static class TestClass_0620 {}
+    public static class TestClass_0621 {}
+    public static class TestClass_0622 {}
+    public static class TestClass_0623 {}
+    public static class TestClass_0624 {}
+    public static class TestClass_0625 {}
+    public static class TestClass_0626 {}
+    public static class TestClass_0627 {}
+    public static class TestClass_0628 {}
+    public static class TestClass_0629 {}
+    public static class TestClass_0630 {}
+    public static class TestClass_0631 {}
+    public static class TestClass_0632 {}
+    public static class TestClass_0633 {}
+    public static class TestClass_0634 {}
+    public static class TestClass_0635 {}
+    public static class TestClass_0636 {}
+    public static class TestClass_0637 {}
+    public static class TestClass_0638 {}
+    public static class TestClass_0639 {}
+    public static class TestClass_0640 {}
+    public static class TestClass_0641 {}
+    public static class TestClass_0642 {}
+    public static class TestClass_0643 {}
+    public static class TestClass_0644 {}
+    public static class TestClass_0645 {}
+    public static class TestClass_0646 {}
+    public static class TestClass_0647 {}
+    public static class TestClass_0648 {}
+    public static class TestClass_0649 {}
+    public static class TestClass_0650 {}
+    public static class TestClass_0651 {}
+    public static class TestClass_0652 {}
+    public static class TestClass_0653 {}
+    public static class TestClass_0654 {}
+    public static class TestClass_0655 {}
+    public static class TestClass_0656 {}
+    public static class TestClass_0657 {}
+    public static class TestClass_0658 {}
+    public static class TestClass_0659 {}
+    public static class TestClass_0660 {}
+    public static class TestClass_0661 {}
+    public static class TestClass_0662 {}
+    public static class TestClass_0663 {}
+    public static class TestClass_0664 {}
+    public static class TestClass_0665 {}
+    public static class TestClass_0666 {}
+    public static class TestClass_0667 {}
+    public static class TestClass_0668 {}
+    public static class TestClass_0669 {}
+    public static class TestClass_0670 {}
+    public static class TestClass_0671 {}
+    public static class TestClass_0672 {}
+    public static class TestClass_0673 {}
+    public static class TestClass_0674 {}
+    public static class TestClass_0675 {}
+    public static class TestClass_0676 {}
+    public static class TestClass_0677 {}
+    public static class TestClass_0678 {}
+    public static class TestClass_0679 {}
+    public static class TestClass_0680 {}
+    public static class TestClass_0681 {}
+    public static class TestClass_0682 {}
+    public static class TestClass_0683 {}
+    public static class TestClass_0684 {}
+    public static class TestClass_0685 {}
+    public static class TestClass_0686 {}
+    public static class TestClass_0687 {}
+    public static class TestClass_0688 {}
+    public static class TestClass_0689 {}
+    public static class TestClass_0690 {}
+    public static class TestClass_0691 {}
+    public static class TestClass_0692 {}
+    public static class TestClass_0693 {}
+    public static class TestClass_0694 {}
+    public static class TestClass_0695 {}
+    public static class TestClass_0696 {}
+    public static class TestClass_0697 {}
+    public static class TestClass_0698 {}
+    public static class TestClass_0699 {}
+    public static class TestClass_0700 {}
+    public static class TestClass_0701 {}
+    public static class TestClass_0702 {}
+    public static class TestClass_0703 {}
+    public static class TestClass_0704 {}
+    public static class TestClass_0705 {}
+    public static class TestClass_0706 {}
+    public static class TestClass_0707 {}
+    public static class TestClass_0708 {}
+    public static class TestClass_0709 {}
+    public static class TestClass_0710 {}
+    public static class TestClass_0711 {}
+    public static class TestClass_0712 {}
+    public static class TestClass_0713 {}
+    public static class TestClass_0714 {}
+    public static class TestClass_0715 {}
+    public static class TestClass_0716 {}
+    public static class TestClass_0717 {}
+    public static class TestClass_0718 {}
+    public static class TestClass_0719 {}
+    public static class TestClass_0720 {}
+    public static class TestClass_0721 {}
+    public static class TestClass_0722 {}
+    public static class TestClass_0723 {}
+    public static class TestClass_0724 {}
+    public static class TestClass_0725 {}
+    public static class TestClass_0726 {}
+    public static class TestClass_0727 {}
+    public static class TestClass_0728 {}
+    public static class TestClass_0729 {}
+    public static class TestClass_0730 {}
+    public static class TestClass_0731 {}
+    public static class TestClass_0732 {}
+    public static class TestClass_0733 {}
+    public static class TestClass_0734 {}
+    public static class TestClass_0735 {}
+    public static class TestClass_0736 {}
+    public static class TestClass_0737 {}
+    public static class TestClass_0738 {}
+    public static class TestClass_0739 {}
+    public static class TestClass_0740 {}
+    public static class TestClass_0741 {}
+    public static class TestClass_0742 {}
+    public static class TestClass_0743 {}
+    public static class TestClass_0744 {}
+    public static class TestClass_0745 {}
+    public static class TestClass_0746 {}
+    public static class TestClass_0747 {}
+    public static class TestClass_0748 {}
+    public static class TestClass_0749 {}
+    public static class TestClass_0750 {}
+    public static class TestClass_0751 {}
+    public static class TestClass_0752 {}
+    public static class TestClass_0753 {}
+    public static class TestClass_0754 {}
+    public static class TestClass_0755 {}
+    public static class TestClass_0756 {}
+    public static class TestClass_0757 {}
+    public static class TestClass_0758 {}
+    public static class TestClass_0759 {}
+    public static class TestClass_0760 {}
+    public static class TestClass_0761 {}
+    public static class TestClass_0762 {}
+    public static class TestClass_0763 {}
+    public static class TestClass_0764 {}
+    public static class TestClass_0765 {}
+    public static class TestClass_0766 {}
+    public static class TestClass_0767 {}
+    public static class TestClass_0768 {}
+    public static class TestClass_0769 {}
+    public static class TestClass_0770 {}
+    public static class TestClass_0771 {}
+    public static class TestClass_0772 {}
+    public static class TestClass_0773 {}
+    public static class TestClass_0774 {}
+    public static class TestClass_0775 {}
+    public static class TestClass_0776 {}
+    public static class TestClass_0777 {}
+    public static class TestClass_0778 {}
+    public static class TestClass_0779 {}
+    public static class TestClass_0780 {}
+    public static class TestClass_0781 {}
+    public static class TestClass_0782 {}
+    public static class TestClass_0783 {}
+    public static class TestClass_0784 {}
+    public static class TestClass_0785 {}
+    public static class TestClass_0786 {}
+    public static class TestClass_0787 {}
+    public static class TestClass_0788 {}
+    public static class TestClass_0789 {}
+    public static class TestClass_0790 {}
+    public static class TestClass_0791 {}
+    public static class TestClass_0792 {}
+    public static class TestClass_0793 {}
+    public static class TestClass_0794 {}
+    public static class TestClass_0795 {}
+    public static class TestClass_0796 {}
+    public static class TestClass_0797 {}
+    public static class TestClass_0798 {}
+    public static class TestClass_0799 {}
+    public static class TestClass_0800 {}
+    public static class TestClass_0801 {}
+    public static class TestClass_0802 {}
+    public static class TestClass_0803 {}
+    public static class TestClass_0804 {}
+    public static class TestClass_0805 {}
+    public static class TestClass_0806 {}
+    public static class TestClass_0807 {}
+    public static class TestClass_0808 {}
+    public static class TestClass_0809 {}
+    public static class TestClass_0810 {}
+    public static class TestClass_0811 {}
+    public static class TestClass_0812 {}
+    public static class TestClass_0813 {}
+    public static class TestClass_0814 {}
+    public static class TestClass_0815 {}
+    public static class TestClass_0816 {}
+    public static class TestClass_0817 {}
+    public static class TestClass_0818 {}
+    public static class TestClass_0819 {}
+    public static class TestClass_0820 {}
+    public static class TestClass_0821 {}
+    public static class TestClass_0822 {}
+    public static class TestClass_0823 {}
+    public static class TestClass_0824 {}
+    public static class TestClass_0825 {}
+    public static class TestClass_0826 {}
+    public static class TestClass_0827 {}
+    public static class TestClass_0828 {}
+    public static class TestClass_0829 {}
+    public static class TestClass_0830 {}
+    public static class TestClass_0831 {}
+    public static class TestClass_0832 {}
+    public static class TestClass_0833 {}
+    public static class TestClass_0834 {}
+    public static class TestClass_0835 {}
+    public static class TestClass_0836 {}
+    public static class TestClass_0837 {}
+    public static class TestClass_0838 {}
+    public static class TestClass_0839 {}
+    public static class TestClass_0840 {}
+    public static class TestClass_0841 {}
+    public static class TestClass_0842 {}
+    public static class TestClass_0843 {}
+    public static class TestClass_0844 {}
+    public static class TestClass_0845 {}
+    public static class TestClass_0846 {}
+    public static class TestClass_0847 {}
+    public static class TestClass_0848 {}
+    public static class TestClass_0849 {}
+    public static class TestClass_0850 {}
+    public static class TestClass_0851 {}
+    public static class TestClass_0852 {}
+    public static class TestClass_0853 {}
+    public static class TestClass_0854 {}
+    public static class TestClass_0855 {}
+    public static class TestClass_0856 {}
+    public static class TestClass_0857 {}
+    public static class TestClass_0858 {}
+    public static class TestClass_0859 {}
+    public static class TestClass_0860 {}
+    public static class TestClass_0861 {}
+    public static class TestClass_0862 {}
+    public static class TestClass_0863 {}
+    public static class TestClass_0864 {}
+    public static class TestClass_0865 {}
+    public static class TestClass_0866 {}
+    public static class TestClass_0867 {}
+    public static class TestClass_0868 {}
+    public static class TestClass_0869 {}
+    public static class TestClass_0870 {}
+    public static class TestClass_0871 {}
+    public static class TestClass_0872 {}
+    public static class TestClass_0873 {}
+    public static class TestClass_0874 {}
+    public static class TestClass_0875 {}
+    public static class TestClass_0876 {}
+    public static class TestClass_0877 {}
+    public static class TestClass_0878 {}
+    public static class TestClass_0879 {}
+    public static class TestClass_0880 {}
+    public static class TestClass_0881 {}
+    public static class TestClass_0882 {}
+    public static class TestClass_0883 {}
+    public static class TestClass_0884 {}
+    public static class TestClass_0885 {}
+    public static class TestClass_0886 {}
+    public static class TestClass_0887 {}
+    public static class TestClass_0888 {}
+    public static class TestClass_0889 {}
+    public static class TestClass_0890 {}
+    public static class TestClass_0891 {}
+    public static class TestClass_0892 {}
+    public static class TestClass_0893 {}
+    public static class TestClass_0894 {}
+    public static class TestClass_0895 {}
+    public static class TestClass_0896 {}
+    public static class TestClass_0897 {}
+    public static class TestClass_0898 {}
+    public static class TestClass_0899 {}
+    public static class TestClass_0900 {}
+    public static class TestClass_0901 {}
+    public static class TestClass_0902 {}
+    public static class TestClass_0903 {}
+    public static class TestClass_0904 {}
+    public static class TestClass_0905 {}
+    public static class TestClass_0906 {}
+    public static class TestClass_0907 {}
+    public static class TestClass_0908 {}
+    public static class TestClass_0909 {}
+    public static class TestClass_0910 {}
+    public static class TestClass_0911 {}
+    public static class TestClass_0912 {}
+    public static class TestClass_0913 {}
+    public static class TestClass_0914 {}
+    public static class TestClass_0915 {}
+    public static class TestClass_0916 {}
+    public static class TestClass_0917 {}
+    public static class TestClass_0918 {}
+    public static class TestClass_0919 {}
+    public static class TestClass_0920 {}
+    public static class TestClass_0921 {}
+    public static class TestClass_0922 {}
+    public static class TestClass_0923 {}
+    public static class TestClass_0924 {}
+    public static class TestClass_0925 {}
+    public static class TestClass_0926 {}
+    public static class TestClass_0927 {}
+    public static class TestClass_0928 {}
+    public static class TestClass_0929 {}
+    public static class TestClass_0930 {}
+    public static class TestClass_0931 {}
+    public static class TestClass_0932 {}
+    public static class TestClass_0933 {}
+    public static class TestClass_0934 {}
+    public static class TestClass_0935 {}
+    public static class TestClass_0936 {}
+    public static class TestClass_0937 {}
+    public static class TestClass_0938 {}
+    public static class TestClass_0939 {}
+    public static class TestClass_0940 {}
+    public static class TestClass_0941 {}
+    public static class TestClass_0942 {}
+    public static class TestClass_0943 {}
+    public static class TestClass_0944 {}
+    public static class TestClass_0945 {}
+    public static class TestClass_0946 {}
+    public static class TestClass_0947 {}
+    public static class TestClass_0948 {}
+    public static class TestClass_0949 {}
+    public static class TestClass_0950 {}
+    public static class TestClass_0951 {}
+    public static class TestClass_0952 {}
+    public static class TestClass_0953 {}
+    public static class TestClass_0954 {}
+    public static class TestClass_0955 {}
+    public static class TestClass_0956 {}
+    public static class TestClass_0957 {}
+    public static class TestClass_0958 {}
+    public static class TestClass_0959 {}
+    public static class TestClass_0960 {}
+    public static class TestClass_0961 {}
+    public static class TestClass_0962 {}
+    public static class TestClass_0963 {}
+    public static class TestClass_0964 {}
+    public static class TestClass_0965 {}
+    public static class TestClass_0966 {}
+    public static class TestClass_0967 {}
+    public static class TestClass_0968 {}
+    public static class TestClass_0969 {}
+    public static class TestClass_0970 {}
+    public static class TestClass_0971 {}
+    public static class TestClass_0972 {}
+    public static class TestClass_0973 {}
+    public static class TestClass_0974 {}
+    public static class TestClass_0975 {}
+    public static class TestClass_0976 {}
+    public static class TestClass_0977 {}
+    public static class TestClass_0978 {}
+    public static class TestClass_0979 {}
+    public static class TestClass_0980 {}
+    public static class TestClass_0981 {}
+    public static class TestClass_0982 {}
+    public static class TestClass_0983 {}
+    public static class TestClass_0984 {}
+    public static class TestClass_0985 {}
+    public static class TestClass_0986 {}
+    public static class TestClass_0987 {}
+    public static class TestClass_0988 {}
+    public static class TestClass_0989 {}
+    public static class TestClass_0990 {}
+    public static class TestClass_0991 {}
+    public static class TestClass_0992 {}
+    public static class TestClass_0993 {}
+    public static class TestClass_0994 {}
+    public static class TestClass_0995 {}
+    public static class TestClass_0996 {}
+    public static class TestClass_0997 {}
+    public static class TestClass_0998 {}
+    public static class TestClass_0999 {}
+    public static class TestClass_1000 {}
+    public static class TestClass_1001 {}
+    public static class TestClass_1002 {}
+    public static class TestClass_1003 {}
+    public static class TestClass_1004 {}
+    public static class TestClass_1005 {}
+    public static class TestClass_1006 {}
+    public static class TestClass_1007 {}
+    public static class TestClass_1008 {}
+    public static class TestClass_1009 {}
+    public static class TestClass_1010 {}
+    public static class TestClass_1011 {}
+    public static class TestClass_1012 {}
+    public static class TestClass_1013 {}
+    public static class TestClass_1014 {}
+    public static class TestClass_1015 {}
+    public static class TestClass_1016 {}
+    public static class TestClass_1017 {}
+    public static class TestClass_1018 {}
+    public static class TestClass_1019 {}
+    public static class TestClass_1020 {}
+    public static class TestClass_1021 {}
+    public static class TestClass_1022 {}
+    public static class TestClass_1023 {}
+    public static class TestClass_1024 {}
+
+    public void timeConstClassWithConflict(int count) {
+        Class<?> class0001 = TestClass_0001.class;
+        for (int i = 0; i < count; ++i) {
+            $noinline$foo(class0001);  // Prevent LICM on the TestClass_xxxx.class below.
+            $noinline$foo(TestClass_0000.class);
+            $noinline$foo(TestClass_1024.class);
+        }
+    }
+
+    public void timeConstClassWithoutConflict(int count) {
+        Class<?> class0000 = TestClass_0000.class;
+        for (int i = 0; i < count; ++i) {
+            $noinline$foo(class0000);  // Prevent LICM on the TestClass_xxxx.class below.
+            $noinline$foo(TestClass_0001.class);
+            $noinline$foo(TestClass_1023.class);
+        }
+    }
+
+    static void $noinline$foo(Class<?> s) {
+        if (doThrow) { throw new Error(); }
+    }
+
+    public static boolean doThrow = false;
+}
diff --git a/benchmark/const-string/src/ConstStringBenchmark.java b/benchmark/const-string/src/ConstStringBenchmark.java
index 2beb0a4..2359a5f 100644
--- a/benchmark/const-string/src/ConstStringBenchmark.java
+++ b/benchmark/const-string/src/ConstStringBenchmark.java
@@ -18,6 +18,7 @@
     // Initialize 1025 strings with consecutive string indexes in the dex file.
     // The tests below rely on the knowledge that ART uses the low 10 bits
     // of the string index as the hash into DexCache strings array.
+    // Note: n == n + 1024 (mod 2^10), n + 1 != n + 1023 (mod 2^10).
     public static final String string_0000 = "TestString_0000";
     public static final String string_0001 = "TestString_0001";
     public static final String string_0002 = "TestString_0002";
@@ -1045,21 +1046,21 @@
     public static final String string_1024 = "TestString_1024";
 
     public void timeConstStringsWithConflict(int count) {
-      for (int i = 0; i < count; ++i) {
-        $noinline$foo("TestString_0000");
-        $noinline$foo("TestString_1024");
-      }
+        for (int i = 0; i < count; ++i) {
+            $noinline$foo("TestString_0000");
+            $noinline$foo("TestString_1024");
+        }
     }
 
     public void timeConstStringsWithoutConflict(int count) {
-      for (int i = 0; i < count; ++i) {
-        $noinline$foo("TestString_0001");
-        $noinline$foo("TestString_1023");
-      }
+        for (int i = 0; i < count; ++i) {
+            $noinline$foo("TestString_0001");
+            $noinline$foo("TestString_1023");
+        }
     }
 
     static void $noinline$foo(String s) {
-      if (doThrow) { throw new Error(); }
+        if (doThrow) { throw new Error(); }
     }
 
     public static boolean doThrow = false;
diff --git a/benchmark/string-indexof/info.txt b/benchmark/string-indexof/info.txt
new file mode 100644
index 0000000..cc04217
--- /dev/null
+++ b/benchmark/string-indexof/info.txt
@@ -0,0 +1 @@
+Benchmarks for repeating String.indexOf() instructions in a loop.
diff --git a/benchmark/string-indexof/src/StringIndexOfBenchmark.java b/benchmark/string-indexof/src/StringIndexOfBenchmark.java
new file mode 100644
index 0000000..481a27a
--- /dev/null
+++ b/benchmark/string-indexof/src/StringIndexOfBenchmark.java
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class StringIndexOfBenchmark {
+    public static final String string36 = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";  // length = 36
+
+    public void timeIndexOf0(int count) {
+        final char c = '0';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOf1(int count) {
+        final char c = '1';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOf2(int count) {
+        final char c = '2';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOf3(int count) {
+        final char c = '3';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOf4(int count) {
+        final char c = '4';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOf7(int count) {
+        final char c = '7';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOf8(int count) {
+        final char c = '8';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOfF(int count) {
+        final char c = 'F';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOfG(int count) {
+        final char c = 'G';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOfV(int count) {
+        final char c = 'V';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOfW(int count) {
+        final char c = 'W';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    public void timeIndexOf_(int count) {
+        final char c = '_';
+        String s = string36;
+        for (int i = 0; i < count; ++i) {
+            $noinline$indexOf(s, c);
+        }
+    }
+
+    static int $noinline$indexOf(String s, char c) {
+        if (doThrow) { throw new Error(); }
+        return s.indexOf(c);
+    }
+
+    public static boolean doThrow = false;
+}
diff --git a/build/Android.bp b/build/Android.bp
index 9156027..cd9d74a 100644
--- a/build/Android.bp
+++ b/build/Android.bp
@@ -145,6 +145,10 @@
         "external/vixl/src",
         "external/zlib",
     ],
+
+    tidy_checks: [
+        "-google-default-arguments",
+    ],
 }
 
 art_debug_defaults {
diff --git a/build/Android.common_test.mk b/build/Android.common_test.mk
index 449502c..291db8b 100644
--- a/build/Android.common_test.mk
+++ b/build/Android.common_test.mk
@@ -23,7 +23,10 @@
 ifneq ($(TMPDIR),)
 ART_HOST_TEST_DIR := $(TMPDIR)/test-art-$(shell echo $$PPID)
 else
-ART_HOST_TEST_DIR := /tmp/$(USER)/test-art-$(shell echo $$PPID)
+# Use a BSD checksum calculated from ANDROID_BUILD_TOP and USER as one of the
+# path components for the test output. This should allow us to run tests from multiple
+# repositories at the same time.
+ART_HOST_TEST_DIR := /tmp/test-art-$(shell echo ${ANDROID_BUILD_TOP}-${USER} | sum | cut -d ' ' -f1)
 endif
 
 # List of known broken tests that we won't attempt to execute. The test name must be the full
@@ -121,12 +124,17 @@
 ART_TEST_RUN_TEST_MULTI_IMAGE ?= $(ART_TEST_FULL)
 
 # Define the command run on test failure. $(1) is the name of the test. Executed by the shell.
+# If the test was a top-level make target (e.g. `test-art-host-gtest-codegen_test64`), the command
+# fails with exit status 1 (returned by the last `grep` statement below).
+# Otherwise (e.g., if the test was run as a prerequisite of a compound test command, such as
+# `test-art-host-gtest-codegen_test`), the command does not fail, as this would break rules running
+# ART_TEST_PREREQ_FINISHED as one of their actions, which expects *all* prerequisites *not* to fail.
 define ART_TEST_FAILED
   ( [ -f $(ART_HOST_TEST_DIR)/skipped/$(1) ] || \
     (mkdir -p $(ART_HOST_TEST_DIR)/failed/ && touch $(ART_HOST_TEST_DIR)/failed/$(1) && \
       echo $(ART_TEST_KNOWN_FAILING) | grep -q $(1) \
         && (echo -e "$(1) \e[91mKNOWN FAILURE\e[0m") \
-        || (echo -e "$(1) \e[91mFAILED\e[0m" >&2 )))
+        || (echo -e "$(1) \e[91mFAILED\e[0m" >&2; echo $(MAKECMDGOALS) | grep -q -v $(1))))
 endef
 
 ifeq ($(ART_TEST_QUIET),true)
diff --git a/build/Android.gtest.mk b/build/Android.gtest.mk
index 850702a..c785bef 100644
--- a/build/Android.gtest.mk
+++ b/build/Android.gtest.mk
@@ -29,6 +29,8 @@
   GetMethodSignature \
   ImageLayoutA \
   ImageLayoutB \
+  IMTA \
+  IMTB \
   Instrumentation \
   Interfaces \
   Lookup \
@@ -88,6 +90,7 @@
 ART_GTEST_dex2oat_test_DEX_DEPS := $(ART_GTEST_dex2oat_environment_tests_DEX_DEPS) Statics
 ART_GTEST_exception_test_DEX_DEPS := ExceptionHandle
 ART_GTEST_image_test_DEX_DEPS := ImageLayoutA ImageLayoutB
+ART_GTEST_imtable_test_DEX_DEPS := IMTA IMTB
 ART_GTEST_instrumentation_test_DEX_DEPS := Instrumentation
 ART_GTEST_jni_compiler_test_DEX_DEPS := MyClassNatives
 ART_GTEST_jni_internal_test_DEX_DEPS := AllFields StaticLeafMethods
@@ -102,7 +105,7 @@
 ART_GTEST_stub_test_DEX_DEPS := AllFields
 ART_GTEST_transaction_test_DEX_DEPS := Transaction
 ART_GTEST_type_lookup_table_test_DEX_DEPS := Lookup
-ART_GTEST_verifier_deps_test_DEX_DEPS := VerifierDeps
+ART_GTEST_verifier_deps_test_DEX_DEPS := VerifierDeps MultiDex
 
 # The elf writer test has dependencies on core.oat.
 ART_GTEST_elf_writer_test_HOST_DEPS := $(HOST_CORE_IMAGE_optimizing_no-pic_64) $(HOST_CORE_IMAGE_optimizing_no-pic_32)
@@ -593,6 +596,7 @@
 ART_GTEST_exception_test_DEX_DEPS :=
 ART_GTEST_elf_writer_test_HOST_DEPS :=
 ART_GTEST_elf_writer_test_TARGET_DEPS :=
+ART_GTEST_imtable_test_DEX_DEPS :=
 ART_GTEST_jni_compiler_test_DEX_DEPS :=
 ART_GTEST_jni_internal_test_DEX_DEPS :=
 ART_GTEST_oat_file_assistant_test_DEX_DEPS :=
diff --git a/build/Android.oat.mk b/build/Android.oat.mk
index c4887e6..3b273a2 100644
--- a/build/Android.oat.mk
+++ b/build/Android.oat.mk
@@ -122,7 +122,8 @@
 	  --oat-location=$$(PRIVATE_CORE_OAT_NAME) --image=$$(PRIVATE_CORE_IMG_NAME) \
 	  --base=$$(LIBART_IMG_HOST_BASE_ADDRESS) --instruction-set=$$($(3)ART_HOST_ARCH) \
 	  $$(LOCAL_$(3)DEX2OAT_HOST_INSTRUCTION_SET_FEATURES_OPTION) \
-	  --host --android-root=$$(HOST_OUT) --include-patch-information --generate-debug-info \
+	  --host --android-root=$$(HOST_OUT) --include-patch-information \
+	  --generate-debug-info --generate-build-id \
 	  $$(PRIVATE_CORE_MULTI_PARAM) $$(PRIVATE_CORE_COMPILE_OPTIONS)
 
 $$(core_oat_name): $$(core_image_name)
@@ -214,9 +215,24 @@
       $(4)TARGET_CORE_IMAGE_$(1)_$(2)_64 := $$(core_image_name)
     else
       $(4)TARGET_CORE_IMAGE_$(1)_$(2)_32 := $$(core_image_name)
+      ifdef ART_USE_VIXL_ARM_BACKEND
+        ifeq ($(1),optimizing)
+          # TODO(VIXL): The ARM VIXL backend is still work in progress. Therefore for now we do not
+          # compile the core image with the Optimizing backend when ART_USE_VIXL_ARM_BACKEND is
+          # defined.
+          core_compile_options += --compiler-filter=interpret-only
+        endif
+      endif
     endif
   else
     $(4)TARGET_CORE_IMAGE_$(1)_$(2)_32 := $$(core_image_name)
+    ifdef ART_USE_VIXL_ARM_BACKEND
+      ifeq ($(1),optimizing)
+      # TODO(VIXL): The ARM VIXL backend is still work in progress. Therefore for now we do not
+      # compile the core image with the Optimizing backend when ART_USE_VIXL_ARM_BACKEND is defined.
+      core_compile_options += --compiler-filter=interpret-only
+      endif
+    endif
   endif
   $(4)TARGET_CORE_IMG_OUTS += $$(core_image_name)
   $(4)TARGET_CORE_OAT_OUTS += $$(core_oat_name)
@@ -239,7 +255,8 @@
 	  --base=$$(LIBART_IMG_TARGET_BASE_ADDRESS) --instruction-set=$$($(3)TARGET_ARCH) \
 	  --instruction-set-variant=$$($(3)DEX2OAT_TARGET_CPU_VARIANT) \
 	  --instruction-set-features=$$($(3)DEX2OAT_TARGET_INSTRUCTION_SET_FEATURES) \
-	  --android-root=$$(PRODUCT_OUT)/system --include-patch-information --generate-debug-info \
+	  --android-root=$$(PRODUCT_OUT)/system --include-patch-information \
+	  --generate-debug-info --generate-build-id \
 	  $$(PRIVATE_CORE_COMPILE_OPTIONS) || (rm $$(PRIVATE_CORE_OAT_NAME); exit 1)
 
 $$(core_oat_name): $$(core_image_name)
diff --git a/build/art.go b/build/art.go
index b826538..ccaa11d 100644
--- a/build/art.go
+++ b/build/art.go
@@ -15,7 +15,6 @@
 package art
 
 import (
-	"android/soong"
 	"android/soong/android"
 	"android/soong/cc"
 	"fmt"
@@ -75,6 +74,12 @@
 		cflags = append(cflags, "-fstack-protector")
 	}
 
+	if envTrue(ctx, "ART_USE_VIXL_ARM_BACKEND") {
+		// Used to enable the new VIXL-based ARM code generator.
+		cflags = append(cflags, "-DART_USE_VIXL_ARM_BACKEND=1")
+		asflags = append(asflags, "-DART_USE_VIXL_ARM_BACKEND=1")
+	}
+
 	return cflags, asflags
 }
 
@@ -222,13 +227,13 @@
 var artTestMutex sync.Mutex
 
 func init() {
-	soong.RegisterModuleType("art_cc_library", artLibrary)
-	soong.RegisterModuleType("art_cc_binary", artBinary)
-	soong.RegisterModuleType("art_cc_test", artTest)
-	soong.RegisterModuleType("art_cc_test_library", artTestLibrary)
-	soong.RegisterModuleType("art_cc_defaults", artDefaultsFactory)
-	soong.RegisterModuleType("art_global_defaults", artGlobalDefaultsFactory)
-	soong.RegisterModuleType("art_debug_defaults", artDebugDefaultsFactory)
+	android.RegisterModuleType("art_cc_library", artLibrary)
+	android.RegisterModuleType("art_cc_binary", artBinary)
+	android.RegisterModuleType("art_cc_test", artTest)
+	android.RegisterModuleType("art_cc_test_library", artTestLibrary)
+	android.RegisterModuleType("art_cc_defaults", artDefaultsFactory)
+	android.RegisterModuleType("art_global_defaults", artGlobalDefaultsFactory)
+	android.RegisterModuleType("art_debug_defaults", artDebugDefaultsFactory)
 }
 
 func artGlobalDefaultsFactory() (blueprint.Module, []interface{}) {
diff --git a/cmdline/cmdline.h b/cmdline/cmdline.h
index 4dcaf80..6e042c3 100644
--- a/cmdline/cmdline.h
+++ b/cmdline/cmdline.h
@@ -234,7 +234,7 @@
     // Checks for --boot-image location.
     {
       std::string boot_image_location = boot_image_location_;
-      size_t file_name_idx = boot_image_location.rfind("/");
+      size_t file_name_idx = boot_image_location.rfind('/');
       if (file_name_idx == std::string::npos) {  // Prevent a InsertIsaDirectory check failure.
         *error_msg = "Boot image location must have a / in it";
         return false;
@@ -244,7 +244,7 @@
       // This prevents a common error "Could not create an image space..." when initing the Runtime.
       if (file_name_idx != std::string::npos) {
         std::string no_file_name = boot_image_location.substr(0, file_name_idx);
-        size_t ancestor_dirs_idx = no_file_name.rfind("/");
+        size_t ancestor_dirs_idx = no_file_name.rfind('/');
 
         std::string parent_dir_name;
         if (ancestor_dirs_idx != std::string::npos) {
@@ -293,7 +293,7 @@
 template <typename Args = CmdlineArgs>
 struct CmdlineMain {
   int Main(int argc, char** argv) {
-    InitLogging(argv);
+    InitLogging(argv, Runtime::Aborter);
     std::unique_ptr<Args> args = std::unique_ptr<Args>(CreateArguments());
     args_ = args.get();
 
diff --git a/cmdline/cmdline_parser.h b/cmdline/cmdline_parser.h
index cfc0967..d82fd48 100644
--- a/cmdline/cmdline_parser.h
+++ b/cmdline/cmdline_parser.h
@@ -390,7 +390,7 @@
         // Unlike regular argument definitions, when a value gets parsed into its
         // stronger type, we just throw it away.
 
-        if (ign.find("_") != std::string::npos) {  // Does the arg-def have a wildcard?
+        if (ign.find('_') != std::string::npos) {  // Does the arg-def have a wildcard?
           // pretend this is a string, e.g. -Xjitconfig:<anythinggoeshere>
           auto&& builder = Define(ignore_name).template WithType<std::string>().IntoIgnore();
           assert(&builder == this);
diff --git a/cmdline/cmdline_parser_test.cc b/cmdline/cmdline_parser_test.cc
index 5809dcd..550e8c4 100644
--- a/cmdline/cmdline_parser_test.cc
+++ b/cmdline/cmdline_parser_test.cc
@@ -78,7 +78,7 @@
     return memcmp(std::addressof(expected), std::addressof(actual), sizeof(expected)) == 0;
   }
 
-  bool UsuallyEquals(const char* expected, std::string actual) {
+  bool UsuallyEquals(const char* expected, const std::string& actual) {
     return std::string(expected) == actual;
   }
 
@@ -122,14 +122,14 @@
   using RuntimeParser = ParsedOptions::RuntimeParser;
 
   static void SetUpTestCase() {
-    art::InitLogging(nullptr);  // argv = null
+    art::InitLogging(nullptr, art::Runtime::Aborter);  // argv = null
   }
 
   virtual void SetUp() {
     parser_ = ParsedOptions::MakeParser(false);  // do not ignore unrecognized options
   }
 
-  static ::testing::AssertionResult IsResultSuccessful(CmdlineResult result) {
+  static ::testing::AssertionResult IsResultSuccessful(const CmdlineResult& result) {
     if (result.IsSuccess()) {
       return ::testing::AssertionSuccess();
     } else {
@@ -138,7 +138,7 @@
     }
   }
 
-  static ::testing::AssertionResult IsResultFailure(CmdlineResult result,
+  static ::testing::AssertionResult IsResultFailure(const CmdlineResult& result,
                                                     CmdlineResult::Status failure_status) {
     if (result.IsSuccess()) {
       return ::testing::AssertionFailure() << " got success but expected failure: "
diff --git a/cmdline/cmdline_types.h b/cmdline/cmdline_types.h
index b229be4..156ca9e 100644
--- a/cmdline/cmdline_types.h
+++ b/cmdline/cmdline_types.h
@@ -182,7 +182,7 @@
 struct CmdlineType<Memory<Divisor>> : CmdlineTypeParser<Memory<Divisor>> {
   using typename CmdlineTypeParser<Memory<Divisor>>::Result;
 
-  Result Parse(const std::string arg) {
+  Result Parse(const std::string& arg) {
     CMDLINE_DEBUG_LOG << "Parsing memory: " << arg << std::endl;
     size_t val = ParseMemoryOption(arg.c_str(), Divisor);
     CMDLINE_DEBUG_LOG << "Memory parsed to size_t value: " << val << std::endl;
@@ -407,7 +407,7 @@
 
   Result ParseAndAppend(const std::string& args,
                         std::vector<ti::Agent>& existing_value) {
-    existing_value.push_back(ti::Agent::Create(args));
+    existing_value.emplace_back(args);
     return Result::SuccessNoValue();
   }
 
@@ -496,18 +496,15 @@
 struct XGcOption {
   // These defaults are used when the command line arguments for -Xgc:
   // are either omitted completely or partially.
-  gc::CollectorType collector_type_ = kUseReadBarrier ?
-                                           // If RB is enabled (currently a build-time decision),
-                                           // use CC as the default GC.
-                                           gc::kCollectorTypeCC :
-                                           gc::kCollectorTypeDefault;
+  gc::CollectorType collector_type_ = gc::kCollectorTypeDefault;
   bool verify_pre_gc_heap_ = false;
   bool verify_pre_sweeping_heap_ = kIsDebugBuild;
   bool verify_post_gc_heap_ = false;
   bool verify_pre_gc_rosalloc_ = kIsDebugBuild;
   bool verify_pre_sweeping_rosalloc_ = false;
   bool verify_post_gc_rosalloc_ = false;
-  bool measure_ = kIsDebugBuild;
+  // Do no measurements for kUseTableLookupReadBarrier to avoid test timeouts. b/31679493
+  bool measure_ = kIsDebugBuild && !kUseTableLookupReadBarrier;
   bool gcstress_ = false;
 };
 
@@ -579,10 +576,6 @@
     : background_collector_type_(background_collector_type) {}
   BackgroundGcOption()
     : background_collector_type_(gc::kCollectorTypeNone) {
-
-    if (kUseReadBarrier) {
-      background_collector_type_ = gc::kCollectorTypeCCBackground;  // Background compaction for CC.
-    }
   }
 
   operator gc::CollectorType() const { return background_collector_type_; }
@@ -695,7 +688,7 @@
   }
 
   static std::string RemovePrefix(const std::string& source) {
-    size_t prefix_idx = source.find(":");
+    size_t prefix_idx = source.find(':');
 
     if (prefix_idx == std::string::npos) {
       return "";
diff --git a/cmdline/detail/cmdline_parse_argument_detail.h b/cmdline/detail/cmdline_parse_argument_detail.h
index 84beff5..14eac30 100644
--- a/cmdline/detail/cmdline_parse_argument_detail.h
+++ b/cmdline/detail/cmdline_parse_argument_detail.h
@@ -108,7 +108,7 @@
       // If this is true, then the wildcard matching later on can still fail, so this is not
       // a guarantee that the argument is correct, it's more of a strong hint that the
       // user-provided input *probably* was trying to match this argument.
-      size_t MaybeMatches(TokenRange token_list) const {
+      size_t MaybeMatches(const TokenRange& token_list) const {
         auto best_match = FindClosestMatch(token_list);
 
         return best_match.second;
@@ -118,7 +118,7 @@
       //
       // Returns the token range that was the closest match and the # of tokens that
       // this range was matched up until.
-      std::pair<const TokenRange*, size_t> FindClosestMatch(TokenRange token_list) const {
+      std::pair<const TokenRange*, size_t> FindClosestMatch(const TokenRange& token_list) const {
         const TokenRange* best_match_ptr = nullptr;
 
         size_t best_match = 0;
diff --git a/compiler/Android.bp b/compiler/Android.bp
index 6faac09..6edb639 100644
--- a/compiler/Android.bp
+++ b/compiler/Android.bp
@@ -42,6 +42,7 @@
         "linker/vector_output_stream.cc",
         "linker/relative_patcher.cc",
         "jit/jit_compiler.cc",
+        "jit/jit_logger.cc",
         "jni/quick/calling_convention.cc",
         "jni/quick/jni_compiler.cc",
         "optimizing/block_builder.cc",
@@ -105,6 +106,7 @@
                 "optimizing/instruction_simplifier_arm.cc",
                 "optimizing/instruction_simplifier_shared.cc",
                 "optimizing/intrinsics_arm.cc",
+                "optimizing/intrinsics_arm_vixl.cc",
                 "utils/arm/assembler_arm.cc",
                 "utils/arm/assembler_arm_vixl.cc",
                 "utils/arm/assembler_thumb2.cc",
@@ -191,11 +193,20 @@
     ],
     include_dirs: ["art/disassembler"],
     export_include_dirs: ["."],
+
+    // For SHA-1 checksumming of build ID
+    static: {
+        whole_static_libs: ["libcrypto"],
+    },
+    shared: {
+        shared_libs: ["libcrypto"],
+    },
 }
 
 gensrcs {
     name: "art_compiler_operator_srcs",
-    cmd: "art/tools/generate-operator-out.py art/compiler $in > $out",
+    cmd: "$(location generate-operator-out.py) art/compiler $(in) > $(out)",
+    tool_files: ["generate-operator-out.py"],
     srcs: [
         "compiled_method.h",
         "dex/dex_to_dex_compiler.h",
@@ -334,6 +345,7 @@
         "utils/string_reference_test.cc",
         "utils/swap_space_test.cc",
         "utils/test_dex_file_builder_test.cc",
+        "verifier_deps_test.cc",
 
         "jni/jni_cfi_test.cc",
         "optimizing/codegen_test.cc",
@@ -407,6 +419,7 @@
         },
         mips: {
             srcs: [
+                "optimizing/emit_swap_mips_test.cc",
                 "utils/mips/assembler_mips_test.cc",
                 "utils/mips/assembler_mips32r6_test.cc",
             ],
diff --git a/compiler/common_compiler_test.cc b/compiler/common_compiler_test.cc
index bc8facd..2f9164c 100644
--- a/compiler/common_compiler_test.cc
+++ b/compiler/common_compiler_test.cc
@@ -86,7 +86,7 @@
     MakeExecutable(code_ptr, code.size());
     const void* method_code = CompiledMethod::CodePointer(code_ptr,
                                                           compiled_method->GetInstructionSet());
-    LOG(INFO) << "MakeExecutable " << PrettyMethod(method) << " code=" << method_code;
+    LOG(INFO) << "MakeExecutable " << method->PrettyMethod() << " code=" << method_code;
     class_linker_->SetEntryPointsToCompiledCode(method, method_code);
   } else {
     // No code? You must mean to go into the interpreter.
@@ -156,7 +156,7 @@
 
     const InstructionSet instruction_set = kRuntimeISA;
     // Take the default set of instruction features from the build.
-    instruction_set_features_.reset(InstructionSetFeatures::FromCppDefines());
+    instruction_set_features_ = InstructionSetFeatures::FromCppDefines();
 
     runtime_->SetInstructionSet(instruction_set);
     for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
diff --git a/compiler/compiler.cc b/compiler/compiler.cc
index 1626317..c500921 100644
--- a/compiler/compiler.cc
+++ b/compiler/compiler.cc
@@ -47,12 +47,12 @@
   if (code_item.insns_size_in_code_units_ >= UINT16_MAX / 4) {
     LOG(INFO) << "Method exceeds compiler instruction limit: "
               << code_item.insns_size_in_code_units_
-              << " in " << PrettyMethod(method_idx, dex_file);
+              << " in " << dex_file.PrettyMethod(method_idx);
     return true;
   }
   if (code_item.registers_size_ >= UINT16_MAX / 4) {
     LOG(INFO) << "Method exceeds compiler virtual register limit: "
-              << code_item.registers_size_ << " in " << PrettyMethod(method_idx, dex_file);
+              << code_item.registers_size_ << " in " << dex_file.PrettyMethod(method_idx);
     return true;
   }
   return false;
diff --git a/compiler/compiler.h b/compiler/compiler.h
index 9a69456..2ca0b77 100644
--- a/compiler/compiler.h
+++ b/compiler/compiler.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_COMPILER_H_
 
 #include "dex_file.h"
+#include "base/mutex.h"
 #include "os.h"
 
 namespace art {
@@ -34,6 +35,7 @@
 class CompiledMethod;
 template<class T> class Handle;
 class OatWriter;
+class Thread;
 
 class Compiler {
  public:
diff --git a/compiler/debug/elf_symtab_writer.h b/compiler/debug/elf_symtab_writer.h
index 045eddd..af9f091 100644
--- a/compiler/debug/elf_symtab_writer.h
+++ b/compiler/debug/elf_symtab_writer.h
@@ -69,7 +69,7 @@
       name_offset = strtab->Write(info.trampoline_name);
     } else {
       DCHECK(info.dex_file != nullptr);
-      std::string name = PrettyMethod(info.dex_method_index, *info.dex_file, with_signature);
+      std::string name = info.dex_file->PrettyMethod(info.dex_method_index, with_signature);
       if (deduped_addresses.find(info.code_address) != deduped_addresses.end()) {
         name += " [DEDUPED]";
       }
diff --git a/compiler/dex/dex_to_dex_compiler.cc b/compiler/dex/dex_to_dex_compiler.cc
index 236a3b2..9c1d72b 100644
--- a/compiler/dex/dex_to_dex_compiler.cc
+++ b/compiler/dex/dex_to_dex_compiler.cc
@@ -212,7 +212,7 @@
   VLOG(compiler) << "Replacing " << Instruction::Name(inst->Opcode())
                  << " by " << Instruction::Name(Instruction::RETURN_VOID_NO_BARRIER)
                  << " at dex pc " << StringPrintf("0x%x", dex_pc) << " in method "
-                 << PrettyMethod(unit_.GetDexMethodIndex(), GetDexFile(), true);
+                 << GetDexFile().PrettyMethod(unit_.GetDexMethodIndex(), true);
   inst->SetOpcode(Instruction::RETURN_VOID_NO_BARRIER);
 }
 
@@ -232,7 +232,7 @@
   VLOG(compiler) << "Removing " << Instruction::Name(inst->Opcode())
                  << " by replacing it with 2 NOPs at dex pc "
                  << StringPrintf("0x%x", dex_pc) << " in method "
-                 << PrettyMethod(unit_.GetDexMethodIndex(), GetDexFile(), true);
+                 << GetDexFile().PrettyMethod(unit_.GetDexMethodIndex(), true);
   // We are modifying 4 consecutive bytes.
   inst->SetOpcode(Instruction::NOP);
   inst->SetVRegA_10x(0u);  // keep compliant with verifier.
@@ -262,7 +262,7 @@
                    << " by replacing field index " << field_idx
                    << " by field offset " << field_offset.Int32Value()
                    << " at dex pc " << StringPrintf("0x%x", dex_pc) << " in method "
-                   << PrettyMethod(unit_.GetDexMethodIndex(), GetDexFile(), true);
+                   << GetDexFile().PrettyMethod(unit_.GetDexMethodIndex(), true);
     // We are modifying 4 consecutive bytes.
     inst->SetOpcode(new_opcode);
     // Replace field index by field offset.
@@ -300,12 +300,12 @@
   uint32_t vtable_idx = resolved_method->GetMethodIndex();
   DCHECK(IsUint<16>(vtable_idx));
   VLOG(compiler) << "Quickening " << Instruction::Name(inst->Opcode())
-                 << "(" << PrettyMethod(method_idx, GetDexFile(), true) << ")"
+                 << "(" << GetDexFile().PrettyMethod(method_idx, true) << ")"
                  << " to " << Instruction::Name(new_opcode)
                  << " by replacing method index " << method_idx
                  << " by vtable index " << vtable_idx
                  << " at dex pc " << StringPrintf("0x%x", dex_pc) << " in method "
-                 << PrettyMethod(unit_.GetDexMethodIndex(), GetDexFile(), true);
+                 << GetDexFile().PrettyMethod(unit_.GetDexMethodIndex(), true);
   // We are modifying 4 consecutive bytes.
   inst->SetOpcode(new_opcode);
   // Replace method index by vtable index.
diff --git a/compiler/dex/quick_compiler_callbacks.h b/compiler/dex/quick_compiler_callbacks.h
index 34fd88b..db0fdaa 100644
--- a/compiler/dex/quick_compiler_callbacks.h
+++ b/compiler/dex/quick_compiler_callbacks.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_DEX_QUICK_COMPILER_CALLBACKS_H_
 
 #include "compiler_callbacks.h"
+#include "verifier/verifier_deps.h"
 
 namespace art {
 
@@ -46,16 +47,16 @@
     }
 
     verifier::VerifierDeps* GetVerifierDeps() const OVERRIDE {
-      return verifier_deps_;
+      return verifier_deps_.get();
     }
 
-    void SetVerifierDeps(verifier::VerifierDeps* deps) {
-      verifier_deps_ = deps;
+    void SetVerifierDeps(verifier::VerifierDeps* deps) OVERRIDE {
+      verifier_deps_.reset(deps);
     }
 
   private:
     VerificationResults* const verification_results_;
-    verifier::VerifierDeps* verifier_deps_;
+    std::unique_ptr<verifier::VerifierDeps> verifier_deps_;
 };
 
 }  // namespace art
diff --git a/compiler/dex/verification_results.cc b/compiler/dex/verification_results.cc
index d87762d..511a787 100644
--- a/compiler/dex/verification_results.cc
+++ b/compiler/dex/verification_results.cc
@@ -58,8 +58,7 @@
   auto it = verified_methods_.find(ref);
   if (it != verified_methods_.end()) {
     // TODO: Investigate why are we doing the work again for this method and try to avoid it.
-    LOG(WARNING) << "Method processed more than once: "
-        << PrettyMethod(ref.dex_method_index, *ref.dex_file);
+    LOG(WARNING) << "Method processed more than once: " << ref.PrettyMethod();
     if (!Runtime::Current()->UseJitCompilation()) {
       DCHECK_EQ(it->second->GetDevirtMap().size(), verified_method->GetDevirtMap().size());
       DCHECK_EQ(it->second->GetSafeCastSet().size(), verified_method->GetSafeCastSet().size());
diff --git a/compiler/driver/compiler_driver-inl.h b/compiler/driver/compiler_driver-inl.h
index 0884a2a..9711516 100644
--- a/compiler/driver/compiler_driver-inl.h
+++ b/compiler/driver/compiler_driver-inl.h
@@ -31,10 +31,6 @@
 
 namespace art {
 
-inline mirror::DexCache* CompilerDriver::GetDexCache(const DexCompilationUnit* mUnit) {
-  return mUnit->GetClassLinker()->FindDexCache(Thread::Current(), *mUnit->GetDexFile(), false);
-}
-
 inline mirror::ClassLoader* CompilerDriver::GetClassLoader(const ScopedObjectAccess& soa,
                                                            const DexCompilationUnit* mUnit) {
   return soa.Decode<mirror::ClassLoader>(mUnit->GetClassLoader()).Ptr();
@@ -87,10 +83,6 @@
   return resolved_field;
 }
 
-inline mirror::DexCache* CompilerDriver::FindDexCache(const DexFile* dex_file) {
-  return Runtime::Current()->GetClassLinker()->FindDexCache(Thread::Current(), *dex_file, false);
-}
-
 inline ArtField* CompilerDriver::ResolveField(
     const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
     Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit,
@@ -100,30 +92,13 @@
                                  is_static);
 }
 
-inline void CompilerDriver::GetResolvedFieldDexFileLocation(
-    ArtField* resolved_field, const DexFile** declaring_dex_file,
-    uint16_t* declaring_class_idx, uint16_t* declaring_field_idx) {
-  ObjPtr<mirror::Class> declaring_class = resolved_field->GetDeclaringClass();
-  *declaring_dex_file = declaring_class->GetDexCache()->GetDexFile();
-  *declaring_class_idx = declaring_class->GetDexTypeIndex();
-  *declaring_field_idx = resolved_field->GetDexFieldIndex();
-}
-
-inline bool CompilerDriver::IsFieldVolatile(ArtField* field) {
-  return field->IsVolatile();
-}
-
-inline MemberOffset CompilerDriver::GetFieldOffset(ArtField* field) {
-  return field->GetOffset();
-}
-
 inline std::pair<bool, bool> CompilerDriver::IsFastInstanceField(
     mirror::DexCache* dex_cache, mirror::Class* referrer_class,
     ArtField* resolved_field, uint16_t field_idx) {
   DCHECK(!resolved_field->IsStatic());
   ObjPtr<mirror::Class> fields_class = resolved_field->GetDeclaringClass();
   bool fast_get = referrer_class != nullptr &&
-      referrer_class->CanAccessResolvedField(fields_class.Ptr(),
+      referrer_class->CanAccessResolvedField(fields_class,
                                              resolved_field,
                                              dex_cache,
                                              field_idx);
@@ -219,43 +194,6 @@
   return result.first;
 }
 
-inline bool CompilerDriver::IsStaticFieldInReferrerClass(mirror::Class* referrer_class,
-                                                         ArtField* resolved_field) {
-  DCHECK(resolved_field->IsStatic());
-  ObjPtr<mirror::Class> fields_class = resolved_field->GetDeclaringClass();
-  return referrer_class == fields_class;
-}
-
-inline bool CompilerDriver::CanAssumeClassIsInitialized(mirror::Class* klass) {
-  // Being loaded is a pre-requisite for being initialized but let's do the cheap check first.
-  //
-  // NOTE: When AOT compiling an app, we eagerly initialize app classes (and potentially their
-  // super classes in the boot image) but only those that have a trivial initialization, i.e.
-  // without <clinit>() or static values in the dex file for that class or any of its super
-  // classes. So while we could see the klass as initialized during AOT compilation and have
-  // it only loaded at runtime, the needed initialization would have to be trivial and
-  // unobservable from Java, so we may as well treat it as initialized.
-  if (!klass->IsInitialized()) {
-    return false;
-  }
-  return CanAssumeClassIsLoaded(klass);
-}
-
-inline bool CompilerDriver::CanReferrerAssumeClassIsInitialized(mirror::Class* referrer_class,
-                                                                mirror::Class* klass) {
-  return (referrer_class != nullptr
-          && !referrer_class->IsInterface()
-          && referrer_class->IsSubClass(klass))
-      || CanAssumeClassIsInitialized(klass);
-}
-
-inline bool CompilerDriver::IsStaticFieldsClassInitialized(mirror::Class* referrer_class,
-                                                           ArtField* resolved_field) {
-  DCHECK(resolved_field->IsStatic());
-  ObjPtr<mirror::Class> fields_class = resolved_field->GetDeclaringClass();
-  return CanReferrerAssumeClassIsInitialized(referrer_class, fields_class.Ptr());
-}
-
 inline ArtMethod* CompilerDriver::ResolveMethod(
     ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
     Handle<mirror::ClassLoader> class_loader, const DexCompilationUnit* mUnit,
@@ -275,35 +213,6 @@
   return resolved_method;
 }
 
-inline void CompilerDriver::GetResolvedMethodDexFileLocation(
-    ArtMethod* resolved_method, const DexFile** declaring_dex_file,
-    uint16_t* declaring_class_idx, uint16_t* declaring_method_idx) {
-  mirror::Class* declaring_class = resolved_method->GetDeclaringClass();
-  *declaring_dex_file = declaring_class->GetDexCache()->GetDexFile();
-  *declaring_class_idx = declaring_class->GetDexTypeIndex();
-  *declaring_method_idx = resolved_method->GetDexMethodIndex();
-}
-
-inline uint16_t CompilerDriver::GetResolvedMethodVTableIndex(
-    ArtMethod* resolved_method, InvokeType type) {
-  if (type == kVirtual || type == kSuper) {
-    return resolved_method->GetMethodIndex();
-  } else if (type == kInterface) {
-    return resolved_method->GetDexMethodIndex();
-  } else {
-    return DexFile::kDexNoIndex16;
-  }
-}
-
-inline bool CompilerDriver::IsMethodsClassInitialized(mirror::Class* referrer_class,
-                                                      ArtMethod* resolved_method) {
-  if (!resolved_method->IsStatic()) {
-    return true;
-  }
-  mirror::Class* methods_class = resolved_method->GetDeclaringClass();
-  return CanReferrerAssumeClassIsInitialized(referrer_class, methods_class);
-}
-
 }  // namespace art
 
 #endif  // ART_COMPILER_DRIVER_COMPILER_DRIVER_INL_H_
diff --git a/compiler/driver/compiler_driver.cc b/compiler/driver/compiler_driver.cc
index e2f8d92..223be88 100644
--- a/compiler/driver/compiler_driver.cc
+++ b/compiler/driver/compiler_driver.cc
@@ -24,6 +24,8 @@
 #include <malloc.h>  // For mallinfo
 #endif
 
+#include "android-base/strings.h"
+
 #include "art_field-inl.h"
 #include "art_method-inl.h"
 #include "base/array_ref.h"
@@ -37,6 +39,7 @@
 #include "compiled_class.h"
 #include "compiled_method.h"
 #include "compiler.h"
+#include "compiler_callbacks.h"
 #include "compiler_driver-inl.h"
 #include "dex_compilation_unit.h"
 #include "dex_file-inl.h"
@@ -72,6 +75,7 @@
 #include "verifier/method_verifier.h"
 #include "verifier/method_verifier-inl.h"
 #include "verifier/verifier_log_mode.h"
+#include "verifier/verifier_deps.h"
 
 namespace art {
 
@@ -95,8 +99,6 @@
  public:
   AOTCompilationStats()
       : stats_lock_("AOT compilation statistics lock"),
-        types_in_dex_cache_(0), types_not_in_dex_cache_(0),
-        strings_in_dex_cache_(0), strings_not_in_dex_cache_(0),
         resolved_types_(0), unresolved_types_(0),
         resolved_instance_fields_(0), unresolved_instance_fields_(0),
         resolved_local_static_fields_(0), resolved_static_fields_(0), unresolved_static_fields_(0),
@@ -112,8 +114,6 @@
   }
 
   void Dump() {
-    DumpStat(types_in_dex_cache_, types_not_in_dex_cache_, "types known to be in dex cache");
-    DumpStat(strings_in_dex_cache_, strings_not_in_dex_cache_, "strings known to be in dex cache");
     DumpStat(resolved_types_, unresolved_types_, "types resolved");
     DumpStat(resolved_instance_fields_, unresolved_instance_fields_, "instance fields resolved");
     DumpStat(resolved_local_static_fields_ + resolved_static_fields_, unresolved_static_fields_,
@@ -164,26 +164,6 @@
 #define STATS_LOCK()
 #endif
 
-  void TypeInDexCache() REQUIRES(!stats_lock_) {
-    STATS_LOCK();
-    types_in_dex_cache_++;
-  }
-
-  void TypeNotInDexCache() REQUIRES(!stats_lock_) {
-    STATS_LOCK();
-    types_not_in_dex_cache_++;
-  }
-
-  void StringInDexCache() REQUIRES(!stats_lock_) {
-    STATS_LOCK();
-    strings_in_dex_cache_++;
-  }
-
-  void StringNotInDexCache() REQUIRES(!stats_lock_) {
-    STATS_LOCK();
-    strings_not_in_dex_cache_++;
-  }
-
   void TypeDoesntNeedAccessCheck() REQUIRES(!stats_lock_) {
     STATS_LOCK();
     resolved_types_++;
@@ -225,67 +205,6 @@
     type_based_devirtualization_++;
   }
 
-  // Indicate that a method of the given type was resolved at compile time.
-  void ResolvedMethod(InvokeType type) REQUIRES(!stats_lock_) {
-    DCHECK_LE(type, kMaxInvokeType);
-    STATS_LOCK();
-    resolved_methods_[type]++;
-  }
-
-  // Indicate that a method of the given type was unresolved at compile time as it was in an
-  // unknown dex file.
-  void UnresolvedMethod(InvokeType type) REQUIRES(!stats_lock_) {
-    DCHECK_LE(type, kMaxInvokeType);
-    STATS_LOCK();
-    unresolved_methods_[type]++;
-  }
-
-  // Indicate that a type of virtual method dispatch has been converted into a direct method
-  // dispatch.
-  void VirtualMadeDirect(InvokeType type) REQUIRES(!stats_lock_) {
-    DCHECK(type == kVirtual || type == kInterface || type == kSuper);
-    STATS_LOCK();
-    virtual_made_direct_[type]++;
-  }
-
-  // Indicate that a method of the given type was able to call directly into boot.
-  void DirectCallsToBoot(InvokeType type) REQUIRES(!stats_lock_) {
-    DCHECK_LE(type, kMaxInvokeType);
-    STATS_LOCK();
-    direct_calls_to_boot_[type]++;
-  }
-
-  // Indicate that a method of the given type was able to be resolved directly from boot.
-  void DirectMethodsToBoot(InvokeType type) REQUIRES(!stats_lock_) {
-    DCHECK_LE(type, kMaxInvokeType);
-    STATS_LOCK();
-    direct_methods_to_boot_[type]++;
-  }
-
-  void ProcessedInvoke(InvokeType type, int flags) REQUIRES(!stats_lock_) {
-    STATS_LOCK();
-    if (flags == 0) {
-      unresolved_methods_[type]++;
-    } else {
-      DCHECK_NE((flags & kFlagMethodResolved), 0);
-      resolved_methods_[type]++;
-      if ((flags & kFlagVirtualMadeDirect) != 0) {
-        virtual_made_direct_[type]++;
-        if ((flags & kFlagPreciseTypeDevirtualization) != 0) {
-          type_based_devirtualization_++;
-        }
-      } else {
-        DCHECK_EQ((flags & kFlagPreciseTypeDevirtualization), 0);
-      }
-      if ((flags & kFlagDirectCallToBoot) != 0) {
-        direct_calls_to_boot_[type]++;
-      }
-      if ((flags & kFlagDirectMethodToBoot) != 0) {
-        direct_methods_to_boot_[type]++;
-      }
-    }
-  }
-
   // A check-cast could be eliminated due to verifier type analysis.
   void SafeCast() REQUIRES(!stats_lock_) {
     STATS_LOCK();
@@ -301,12 +220,6 @@
  private:
   Mutex stats_lock_;
 
-  size_t types_in_dex_cache_;
-  size_t types_not_in_dex_cache_;
-
-  size_t strings_in_dex_cache_;
-  size_t strings_not_in_dex_cache_;
-
   size_t resolved_types_;
   size_t unresolved_types_;
 
@@ -472,7 +385,8 @@
       ? cls->FindDeclaredDirectMethod(method_name, signature, image_size)
       : cls->FindDeclaredVirtualMethod(method_name, signature, image_size);
   if (method == nullptr) {
-    LOG(FATAL) << "Could not find method of intrinsic " << class_name << method_name << signature;
+    LOG(FATAL) << "Could not find method of intrinsic "
+               << class_name << " " << method_name << " " << signature;
   }
   DCHECK_EQ(method->GetInvokeType(), invoke_type);
   method->SetIntrinsic(static_cast<uint32_t>(intrinsic));
@@ -480,6 +394,7 @@
 
 void CompilerDriver::CompileAll(jobject class_loader,
                                 const std::vector<const DexFile*>& dex_files,
+                                verifier::VerifierDeps* verifier_deps,
                                 TimingLogger* timings) {
   DCHECK(!Runtime::Current()->IsStarted());
 
@@ -491,17 +406,19 @@
   // 2) Resolve all classes
   // 3) Attempt to verify all classes
   // 4) Attempt to initialize image classes, and trivially initialized classes
-  PreCompile(class_loader, dex_files, timings);
+  PreCompile(class_loader, dex_files, verifier_deps, timings);
   if (GetCompilerOptions().IsBootImage()) {
     // We don't need to setup the intrinsics for non boot image compilation, as
     // those compilations will pick up a boot image that have the ArtMethod already
     // set with the intrinsics flag.
     ScopedObjectAccess soa(Thread::Current());
-#define OPTIMIZING_INTRINSICS(Name, InvokeType, NeedsEnvironmentOrCache, SideEffects, Exceptions, ClassName, MethodName, Signature) \
+#define SETUP_INTRINSICS(Name, InvokeType, NeedsEnvironmentOrCache, SideEffects, Exceptions, \
+                         ClassName, MethodName, Signature) \
   SetupIntrinsic(soa.Self(), Intrinsics::k##Name, InvokeType, ClassName, MethodName, Signature);
 #include "intrinsics_list.h"
-INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+INTRINSICS_LIST(SETUP_INTRINSICS)
 #undef INTRINSICS_LIST
+#undef SETUP_INTRINSICS
   }
   // Compile:
   // 1) Compile all classes and methods enabled for compilation. May fall back to dex-to-dex
@@ -705,7 +622,7 @@
   if (kTimeCompileMethod) {
     uint64_t duration_ns = NanoTime() - start_ns;
     if (duration_ns > MsToNs(driver->GetCompiler()->GetMaximumCompilationTimeBeforeWarning())) {
-      LOG(WARNING) << "Compilation of " << PrettyMethod(method_idx, dex_file)
+      LOG(WARNING) << "Compilation of " << dex_file.PrettyMethod(method_idx)
                    << " took " << PrettyDuration(duration_ns);
     }
   }
@@ -727,7 +644,7 @@
 
   if (self->IsExceptionPending()) {
     ScopedObjectAccess soa(self);
-    LOG(FATAL) << "Unexpected exception compiling: " << PrettyMethod(method_idx, dex_file) << "\n"
+    LOG(FATAL) << "Unexpected exception compiling: " << dex_file.PrettyMethod(method_idx) << "\n"
         << self->GetException()->Dump();
   }
 }
@@ -761,7 +678,7 @@
 
   InitializeThreadPools();
 
-  PreCompile(jclass_loader, dex_files, timings);
+  PreCompile(jclass_loader, dex_files, /* verifier_deps */ nullptr, timings);
 
   // Can we run DEX-to-DEX compiler on this class ?
   optimizer::DexToDexCompilationLevel dex_to_dex_compilation_level =
@@ -845,9 +762,10 @@
 // TODO: Collect the relevant string indices in parallel, then allocate them sequentially in a
 //       stable order.
 
-static void ResolveConstStrings(CompilerDriver* driver,
+static void ResolveConstStrings(Handle<mirror::DexCache> dex_cache,
                                 const DexFile& dex_file,
-                                const DexFile::CodeItem* code_item) {
+                                const DexFile::CodeItem* code_item)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
   if (code_item == nullptr) {
     // Abstract or native method.
     return;
@@ -855,18 +773,18 @@
 
   const uint16_t* code_ptr = code_item->insns_;
   const uint16_t* code_end = code_item->insns_ + code_item->insns_size_in_code_units_;
+  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
 
   while (code_ptr < code_end) {
     const Instruction* inst = Instruction::At(code_ptr);
     switch (inst->Opcode()) {
-      case Instruction::CONST_STRING: {
-        uint32_t string_index = inst->VRegB_21c();
-        driver->CanAssumeStringIsPresentInDexCache(dex_file, string_index);
-        break;
-      }
+      case Instruction::CONST_STRING:
       case Instruction::CONST_STRING_JUMBO: {
-        uint32_t string_index = inst->VRegB_31c();
-        driver->CanAssumeStringIsPresentInDexCache(dex_file, string_index);
+        uint32_t string_index = (inst->Opcode() == Instruction::CONST_STRING)
+            ? inst->VRegB_21c()
+            : inst->VRegB_31c();
+        mirror::String* string = class_linker->ResolveString(dex_file, string_index, dex_cache);
+        CHECK(string != nullptr) << "Could not allocate a string when forcing determinism";
         break;
       }
 
@@ -881,7 +799,13 @@
 static void ResolveConstStrings(CompilerDriver* driver,
                                 const std::vector<const DexFile*>& dex_files,
                                 TimingLogger* timings) {
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<1> hs(soa.Self());
+  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+  MutableHandle<mirror::DexCache> dex_cache(hs.NewHandle<mirror::DexCache>(nullptr));
+
   for (const DexFile* dex_file : dex_files) {
+    dex_cache.Assign(class_linker->FindDexCache(soa.Self(), *dex_file, false));
     TimingLogger::ScopedTiming t("Resolve const-string Strings", timings);
 
     size_t class_def_count = dex_file->NumClassDefs();
@@ -922,7 +846,7 @@
           continue;
         }
         previous_direct_method_idx = method_idx;
-        ResolveConstStrings(driver, *dex_file, it.GetMethodCodeItem());
+        ResolveConstStrings(dex_cache, *dex_file, it.GetMethodCodeItem());
         it.Next();
       }
       // Virtual methods.
@@ -936,7 +860,7 @@
           continue;
         }
         previous_virtual_method_idx = method_idx;
-        ResolveConstStrings(driver, *dex_file, it.GetMethodCodeItem());
+        ResolveConstStrings(dex_cache, *dex_file, it.GetMethodCodeItem());
         it.Next();
       }
       DCHECK(!it.HasNext());
@@ -951,6 +875,7 @@
 
 void CompilerDriver::PreCompile(jobject class_loader,
                                 const std::vector<const DexFile*>& dex_files,
+                                verifier::VerifierDeps* verifier_deps,
                                 TimingLogger* timings) {
   CheckThreadPools();
 
@@ -984,7 +909,7 @@
     VLOG(compiler) << "Resolve const-strings: " << GetMemoryUsageString(false);
   }
 
-  Verify(class_loader, dex_files, timings);
+  Verify(class_loader, dex_files, verifier_deps, timings);
   VLOG(compiler) << "Verify: " << GetMemoryUsageString(false);
 
   if (had_hard_verifier_failure_ && GetCompilerOptions().AbortOnHardVerifierFailure()) {
@@ -1021,7 +946,7 @@
     return true;
   }
 
-  std::string tmp = PrettyMethod(method_ref.dex_method_index, *method_ref.dex_file, true);
+  std::string tmp = method_ref.dex_file->PrettyMethod(method_ref.dex_method_index, true);
   return methods_to_compile_->find(tmp.c_str()) != methods_to_compile_->end();
 }
 
@@ -1036,7 +961,7 @@
   if (kDebugProfileGuidedCompilation) {
     LOG(INFO) << "[ProfileGuidedCompilation] "
         << (result ? "Compiled" : "Skipped") << " method:"
-        << PrettyMethod(method_ref.dex_method_index, *method_ref.dex_file, true);
+        << method_ref.dex_file->PrettyMethod(method_ref.dex_method_index, true);
   }
   return result;
 }
@@ -1063,7 +988,7 @@
       std::set<std::pair<uint16_t, const DexFile*>>& exceptions_to_resolve)
      : exceptions_to_resolve_(exceptions_to_resolve) {}
 
-  virtual bool operator()(mirror::Class* c) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
+  virtual bool operator()(ObjPtr<mirror::Class> c) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
     const auto pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
     for (auto& m : c->GetMethods(pointer_size)) {
       ResolveExceptionsForMethod(&m, pointer_size);
@@ -1117,7 +1042,7 @@
   explicit RecordImageClassesVisitor(std::unordered_set<std::string>* image_classes)
       : image_classes_(image_classes) {}
 
-  bool operator()(mirror::Class* klass) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
+  bool operator()(ObjPtr<mirror::Class> klass) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
     std::string temp;
     image_classes_->insert(klass->GetDescriptor(&temp));
     return true;
@@ -1215,6 +1140,7 @@
     VLOG(compiler) << "Adding " << descriptor << " to image classes";
     for (size_t i = 0; i < klass->NumDirectInterfaces(); ++i) {
       StackHandleScope<1> hs2(self);
+      // May cause thread suspension.
       MaybeAddToImageClasses(hs2.NewHandle(mirror::Class::GetDirectInterface(self, klass, i)),
                              image_classes);
     }
@@ -1234,15 +1160,14 @@
 // Note: we can use object pointers because we suspend all threads.
 class ClinitImageUpdate {
  public:
-  static ClinitImageUpdate* Create(std::unordered_set<std::string>* image_class_descriptors,
-                                   Thread* self, ClassLinker* linker, std::string* error_msg) {
-    std::unique_ptr<ClinitImageUpdate> res(new ClinitImageUpdate(image_class_descriptors, self,
+  static ClinitImageUpdate* Create(VariableSizedHandleScope& hs,
+                                   std::unordered_set<std::string>* image_class_descriptors,
+                                   Thread* self,
+                                   ClassLinker* linker) {
+    std::unique_ptr<ClinitImageUpdate> res(new ClinitImageUpdate(hs,
+                                                                 image_class_descriptors,
+                                                                 self,
                                                                  linker));
-    if (res->dex_cache_class_ == nullptr) {
-      *error_msg = "Could not find DexCache class.";
-      return nullptr;
-    }
-
     return res.release();
   }
 
@@ -1252,7 +1177,9 @@
   }
 
   // Visitor for VisitReferences.
-  void operator()(mirror::Object* object, MemberOffset field_offset, bool /* is_static */) const
+  void operator()(ObjPtr<mirror::Object> object,
+                  MemberOffset field_offset,
+                  bool /* is_static */) const
       REQUIRES_SHARED(Locks::mutator_lock_) {
     mirror::Object* ref = object->GetFieldObject<mirror::Object>(field_offset);
     if (ref != nullptr) {
@@ -1261,8 +1188,8 @@
   }
 
   // java.lang.Reference visitor for VisitReferences.
-  void operator()(mirror::Class* klass ATTRIBUTE_UNUSED, mirror::Reference* ref ATTRIBUTE_UNUSED)
-      const {}
+  void operator()(ObjPtr<mirror::Class> klass ATTRIBUTE_UNUSED,
+                  ObjPtr<mirror::Reference> ref ATTRIBUTE_UNUSED) const {}
 
   // Ignore class native roots.
   void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root ATTRIBUTE_UNUSED)
@@ -1271,26 +1198,32 @@
 
   void Walk() REQUIRES_SHARED(Locks::mutator_lock_) {
     // Use the initial classes as roots for a search.
-    for (mirror::Class* klass_root : image_classes_) {
-      VisitClinitClassesObject(klass_root);
+    for (Handle<mirror::Class> klass_root : image_classes_) {
+      VisitClinitClassesObject(klass_root.Get());
+    }
+    for (Handle<mirror::Class> h_klass : to_insert_) {
+      MaybeAddToImageClasses(h_klass, image_class_descriptors_);
     }
   }
 
  private:
   class FindImageClassesVisitor : public ClassVisitor {
    public:
-    explicit FindImageClassesVisitor(ClinitImageUpdate* data) : data_(data) {}
+    explicit FindImageClassesVisitor(VariableSizedHandleScope& hs,
+                                     ClinitImageUpdate* data)
+        : data_(data),
+          hs_(hs) {}
 
-    bool operator()(mirror::Class* klass) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
+    bool operator()(ObjPtr<mirror::Class> klass) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
       std::string temp;
       const char* name = klass->GetDescriptor(&temp);
       if (data_->image_class_descriptors_->find(name) != data_->image_class_descriptors_->end()) {
-        data_->image_classes_.push_back(klass);
+        data_->image_classes_.push_back(hs_.NewHandle(klass));
       } else {
         // Check whether it is initialized and has a clinit. They must be kept, too.
         if (klass->IsInitialized() && klass->FindClassInitializer(
             Runtime::Current()->GetClassLinker()->GetImagePointerSize()) != nullptr) {
-          data_->image_classes_.push_back(klass);
+          data_->image_classes_.push_back(hs_.NewHandle(klass));
         }
       }
       return true;
@@ -1298,25 +1231,25 @@
 
    private:
     ClinitImageUpdate* const data_;
+    VariableSizedHandleScope& hs_;
   };
 
-  ClinitImageUpdate(std::unordered_set<std::string>* image_class_descriptors, Thread* self,
-                    ClassLinker* linker)
-      REQUIRES_SHARED(Locks::mutator_lock_) :
-      image_class_descriptors_(image_class_descriptors), self_(self) {
+  ClinitImageUpdate(VariableSizedHandleScope& hs,
+                    std::unordered_set<std::string>* image_class_descriptors,
+                    Thread* self,
+                    ClassLinker* linker) REQUIRES_SHARED(Locks::mutator_lock_)
+      : hs_(hs),
+        image_class_descriptors_(image_class_descriptors),
+        self_(self) {
     CHECK(linker != nullptr);
     CHECK(image_class_descriptors != nullptr);
 
     // Make sure nobody interferes with us.
     old_cause_ = self->StartAssertNoThreadSuspension("Boot image closure");
 
-    // Find the interesting classes.
-    dex_cache_class_ = linker->LookupClass(self, "Ljava/lang/DexCache;",
-        ComputeModifiedUtf8Hash("Ljava/lang/DexCache;"), nullptr);
-
     // Find all the already-marked classes.
     WriterMutexLock mu(self, *Locks::heap_bitmap_lock_);
-    FindImageClassesVisitor visitor(this);
+    FindImageClassesVisitor visitor(hs_, this);
     linker->VisitClasses(&visitor);
   }
 
@@ -1332,25 +1265,25 @@
     marked_objects_.insert(object);
 
     if (object->IsClass()) {
-      // If it is a class, add it.
-      StackHandleScope<1> hs(self_);
-      MaybeAddToImageClasses(hs.NewHandle(object->AsClass()), image_class_descriptors_);
+      // Add to the TODO list since MaybeAddToImageClasses may cause thread suspension. Thread
+      // suspensionb is not safe to do in VisitObjects or VisitReferences.
+      to_insert_.push_back(hs_.NewHandle(object->AsClass()));
     } else {
       // Else visit the object's class.
       VisitClinitClassesObject(object->GetClass());
     }
 
     // If it is not a DexCache, visit all references.
-    mirror::Class* klass = object->GetClass();
-    if (klass != dex_cache_class_) {
+    if (!object->IsDexCache()) {
       object->VisitReferences(*this, *this);
     }
   }
 
+  VariableSizedHandleScope& hs_;
+  mutable std::vector<Handle<mirror::Class>> to_insert_;
   mutable std::unordered_set<mirror::Object*> marked_objects_;
   std::unordered_set<std::string>* const image_class_descriptors_;
-  std::vector<mirror::Class*> image_classes_;
-  const mirror::Class* dex_cache_class_;
+  std::vector<Handle<mirror::Class>> image_classes_;
   Thread* const self_;
   const char* old_cause_;
 
@@ -1366,12 +1299,12 @@
     // Suspend all threads.
     ScopedSuspendAll ssa(__FUNCTION__);
 
+    VariableSizedHandleScope hs(Thread::Current());
     std::string error_msg;
-    std::unique_ptr<ClinitImageUpdate> update(ClinitImageUpdate::Create(image_classes_.get(),
+    std::unique_ptr<ClinitImageUpdate> update(ClinitImageUpdate::Create(hs,
+                                                                        image_classes_.get(),
                                                                         Thread::Current(),
-                                                                        runtime->GetClassLinker(),
-                                                                        &error_msg));
-    CHECK(update.get() != nullptr) << error_msg;  // TODO: Soft failure?
+                                                                        runtime->GetClassLinker()));
 
     // Do the marking.
     update->Walk();
@@ -1407,54 +1340,6 @@
   dex_to_dex_references_.back().GetMethodIndexes().SetBit(method_ref.dex_method_index);
 }
 
-bool CompilerDriver::CanAssumeTypeIsPresentInDexCache(Handle<mirror::DexCache> dex_cache,
-                                                      uint32_t type_idx) {
-  bool result = false;
-  if ((GetCompilerOptions().IsBootImage() &&
-       IsImageClass(dex_cache->GetDexFile()->StringDataByIdx(
-           dex_cache->GetDexFile()->GetTypeId(type_idx).descriptor_idx_))) ||
-      Runtime::Current()->UseJitCompilation()) {
-    mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
-    result = (resolved_class != nullptr);
-  }
-
-  if (result) {
-    stats_->TypeInDexCache();
-  } else {
-    stats_->TypeNotInDexCache();
-  }
-  return result;
-}
-
-bool CompilerDriver::CanAssumeStringIsPresentInDexCache(const DexFile& dex_file,
-                                                        uint32_t string_idx) {
-  // See also Compiler::ResolveDexFile
-
-  bool result = false;
-  if (GetCompilerOptions().IsBootImage() || Runtime::Current()->UseJitCompilation()) {
-    ScopedObjectAccess soa(Thread::Current());
-    StackHandleScope<1> hs(soa.Self());
-    ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
-    Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(
-        soa.Self(), dex_file, false)));
-    if (GetCompilerOptions().IsBootImage()) {
-      // We resolve all const-string strings when building for the image.
-      class_linker->ResolveString(dex_file, string_idx, dex_cache);
-      result = true;
-    } else {
-      // Just check whether the dex cache already has the string.
-      DCHECK(Runtime::Current()->UseJitCompilation());
-      result = (dex_cache->GetResolvedString(string_idx) != nullptr);
-    }
-  }
-  if (result) {
-    stats_->StringInDexCache();
-  } else {
-    stats_->StringNotInDexCache();
-  }
-  return result;
-}
-
 bool CompilerDriver::CanAccessTypeWithoutChecks(uint32_t referrer_idx,
                                                 Handle<mirror::DexCache> dex_cache,
                                                 uint32_t type_idx) {
@@ -1518,108 +1403,6 @@
   return result;
 }
 
-bool CompilerDriver::CanEmbedTypeInCode(const DexFile& dex_file, uint32_t type_idx,
-                                        bool* is_type_initialized, bool* use_direct_type_ptr,
-                                        uintptr_t* direct_type_ptr, bool* out_is_finalizable) {
-  ScopedObjectAccess soa(Thread::Current());
-  Runtime* runtime = Runtime::Current();
-  mirror::DexCache* dex_cache = runtime->GetClassLinker()->FindDexCache(
-      soa.Self(), dex_file, false);
-  mirror::Class* resolved_class = dex_cache->GetResolvedType(type_idx);
-  if (resolved_class == nullptr) {
-    return false;
-  }
-  if (GetCompilerOptions().GetCompilePic()) {
-    // Do not allow a direct class pointer to be used when compiling for position-independent
-    return false;
-  }
-  *out_is_finalizable = resolved_class->IsFinalizable();
-  gc::Heap* heap = runtime->GetHeap();
-  const bool compiling_boot = heap->IsCompilingBoot();
-  const bool support_boot_image_fixup = GetSupportBootImageFixup();
-  if (compiling_boot) {
-    // boot -> boot class pointers.
-    // True if the class is in the image at boot compiling time.
-    const bool is_image_class = GetCompilerOptions().IsBootImage() && IsImageClass(
-        dex_file.StringDataByIdx(dex_file.GetTypeId(type_idx).descriptor_idx_));
-    // True if pc relative load works.
-    if (is_image_class && support_boot_image_fixup) {
-      *is_type_initialized = resolved_class->IsInitialized();
-      *use_direct_type_ptr = false;
-      *direct_type_ptr = 0;
-      return true;
-    } else {
-      return false;
-    }
-  } else if (runtime->UseJitCompilation() && !heap->IsMovableObject(resolved_class)) {
-    *is_type_initialized = resolved_class->IsInitialized();
-    // If the class may move around, then don't embed it as a direct pointer.
-    *use_direct_type_ptr = true;
-    *direct_type_ptr = reinterpret_cast<uintptr_t>(resolved_class);
-    return true;
-  } else {
-    // True if the class is in the image at app compiling time.
-    const bool class_in_image = heap->FindSpaceFromObject(resolved_class, false)->IsImageSpace();
-    if (class_in_image && support_boot_image_fixup) {
-      // boot -> app class pointers.
-      *is_type_initialized = resolved_class->IsInitialized();
-      // TODO This is somewhat hacky. We should refactor all of this invoke codepath.
-      *use_direct_type_ptr = !GetCompilerOptions().GetIncludePatchInformation();
-      *direct_type_ptr = reinterpret_cast<uintptr_t>(resolved_class);
-      return true;
-    } else {
-      // app -> app class pointers.
-      // Give up because app does not have an image and class
-      // isn't created at compile time.  TODO: implement this
-      // if/when each app gets an image.
-      return false;
-    }
-  }
-}
-
-bool CompilerDriver::CanEmbedReferenceTypeInCode(ClassReference* ref,
-                                                 bool* use_direct_ptr,
-                                                 uintptr_t* direct_type_ptr) {
-  CHECK(ref != nullptr);
-  CHECK(use_direct_ptr != nullptr);
-  CHECK(direct_type_ptr != nullptr);
-
-  ScopedObjectAccess soa(Thread::Current());
-  mirror::Class* reference_class = mirror::Reference::GetJavaLangRefReference();
-  bool is_initialized = false;
-  bool unused_finalizable;
-  // Make sure we have a finished Reference class object before attempting to use it.
-  if (!CanEmbedTypeInCode(*reference_class->GetDexCache()->GetDexFile(),
-                          reference_class->GetDexTypeIndex(), &is_initialized,
-                          use_direct_ptr, direct_type_ptr, &unused_finalizable) ||
-      !is_initialized) {
-    return false;
-  }
-  ref->first = &reference_class->GetDexFile();
-  ref->second = reference_class->GetDexClassDefIndex();
-  return true;
-}
-
-uint32_t CompilerDriver::GetReferenceSlowFlagOffset() const {
-  ScopedObjectAccess soa(Thread::Current());
-  mirror::Class* klass = mirror::Reference::GetJavaLangRefReference();
-  DCHECK(klass->IsInitialized());
-  return klass->GetSlowPathFlagOffset().Uint32Value();
-}
-
-uint32_t CompilerDriver::GetReferenceDisableFlagOffset() const {
-  ScopedObjectAccess soa(Thread::Current());
-  mirror::Class* klass = mirror::Reference::GetJavaLangRefReference();
-  DCHECK(klass->IsInitialized());
-  return klass->GetDisableIntrinsicFlagOffset().Uint32Value();
-}
-
-DexCacheArraysLayout CompilerDriver::GetDexCacheArraysLayout(const DexFile* dex_file) {
-  return ContainsElement(GetDexFilesForOatFile(), dex_file)
-      ? DexCacheArraysLayout(GetInstructionSetPointerSize(instruction_set_), dex_file)
-      : DexCacheArraysLayout();
-}
-
 void CompilerDriver::ProcessedInstanceField(bool resolved) {
   if (!resolved) {
     stats_->UnresolvedInstanceField();
@@ -1638,10 +1421,6 @@
   }
 }
 
-void CompilerDriver::ProcessedInvoke(InvokeType invoke_type, int flags) {
-  stats_->ProcessedInvoke(invoke_type, flags);
-}
-
 ArtField* CompilerDriver::ComputeInstanceFieldInfo(uint32_t field_idx,
                                                    const DexCompilationUnit* mUnit, bool is_put,
                                                    const ScopedObjectAccess& soa) {
@@ -1746,7 +1525,7 @@
 
   if (!use_dex_cache) {
     bool method_in_image = false;
-    const std::vector<gc::space::ImageSpace*> image_spaces = heap->GetBootImageSpaces();
+    const std::vector<gc::space::ImageSpace*>& image_spaces = heap->GetBootImageSpaces();
     for (gc::space::ImageSpace* image_space : image_spaces) {
       const auto& method_section = image_space->GetImageHeader().GetMethodsSection();
       if (method_section.Contains(reinterpret_cast<uint8_t*>(method) - image_space->Begin())) {
@@ -1910,7 +1689,7 @@
   const DexFile& original_dex_file = *klass->GetDexCache()->GetDexFile();
   if (&dex_file != &original_dex_file) {
     if (class_loader == nullptr) {
-      LOG(WARNING) << "Skipping class " << PrettyDescriptor(klass) << " from "
+      LOG(WARNING) << "Skipping class " << klass->PrettyDescriptor() << " from "
                    << dex_file.GetLocation() << " previously found in "
                    << original_dex_file.GetLocation();
     }
@@ -2156,15 +1935,61 @@
   }
 }
 
-void CompilerDriver::Verify(jobject class_loader,
+void CompilerDriver::Verify(jobject jclass_loader,
                             const std::vector<const DexFile*>& dex_files,
+                            verifier::VerifierDeps* verifier_deps,
                             TimingLogger* timings) {
+  if (verifier_deps != nullptr) {
+    TimingLogger::ScopedTiming t("Fast Verify", timings);
+    ScopedObjectAccess soa(Thread::Current());
+    StackHandleScope<2> hs(soa.Self());
+    Handle<mirror::ClassLoader> class_loader(
+        hs.NewHandle(soa.Decode<mirror::ClassLoader>(jclass_loader)));
+    MutableHandle<mirror::Class> cls(hs.NewHandle<mirror::Class>(nullptr));
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    if (verifier_deps->ValidateDependencies(class_loader, soa.Self())) {
+      // We successfully validated the dependencies, now update class status
+      // of verified classes. Note that the dependencies also record which classes
+      // could not be fully verified; we could try again, but that would hurt verification
+      // time. So instead we assume these classes still need to be verified at
+      // runtime.
+      for (const DexFile* dex_file : dex_files) {
+        // Fetch the list of unverified classes and turn it into a set for faster
+        // lookups.
+        const std::vector<uint16_t>& unverified_classes =
+            verifier_deps->GetUnverifiedClasses(*dex_file);
+        std::set<uint16_t> set(unverified_classes.begin(), unverified_classes.end());
+        for (uint32_t i = 0; i < dex_file->NumClassDefs(); ++i) {
+          const DexFile::ClassDef& class_def = dex_file->GetClassDef(i);
+          const char* descriptor = dex_file->GetClassDescriptor(class_def);
+          cls.Assign(class_linker->FindClass(soa.Self(), descriptor, class_loader));
+          if (cls.Get() == nullptr) {
+            CHECK(soa.Self()->IsExceptionPending());
+            soa.Self()->ClearException();
+          } else if (set.find(class_def.class_idx_) == set.end()) {
+            ObjectLock<mirror::Class> lock(soa.Self(), cls);
+            mirror::Class::SetStatus(cls, mirror::Class::kStatusVerified, soa.Self());
+          }
+        }
+      }
+      return;
+    }
+  }
+
+  // If there is no passed `verifier_deps` (because of non-existing vdex), or
+  // the passed `verifier_deps` is not valid anymore, create a new one for
+  // non boot image compilation. The verifier will need it to record the new dependencies.
+  // Then dex2oat can update the vdex file with these new dependencies.
+  if (!GetCompilerOptions().IsBootImage()) {
+    Runtime::Current()->GetCompilerCallbacks()->SetVerifierDeps(
+        new verifier::VerifierDeps(dex_files));
+  }
   // Note: verification should not be pulling in classes anymore when compiling the boot image,
   //       as all should have been resolved before. As such, doing this in parallel should still
   //       be deterministic.
   for (const DexFile* dex_file : dex_files) {
     CHECK(dex_file != nullptr);
-    VerifyDexFile(class_loader,
+    VerifyDexFile(jclass_loader,
                   *dex_file,
                   dex_files,
                   parallel_thread_pool_.get(),
@@ -2195,6 +2020,7 @@
         hs.NewHandle(soa.Decode<mirror::ClassLoader>(jclass_loader)));
     Handle<mirror::Class> klass(
         hs.NewHandle(class_linker->FindClass(soa.Self(), descriptor, class_loader)));
+    verifier::MethodVerifier::FailureKind failure_kind;
     if (klass.Get() == nullptr) {
       CHECK(soa.Self()->IsExceptionPending());
       soa.Self()->ClearException();
@@ -2207,7 +2033,8 @@
       Handle<mirror::DexCache> dex_cache(hs.NewHandle(class_linker->FindDexCache(
           soa.Self(), dex_file, false)));
       std::string error_msg;
-      if (verifier::MethodVerifier::VerifyClass(soa.Self(),
+      failure_kind =
+          verifier::MethodVerifier::VerifyClass(soa.Self(),
                                                 &dex_file,
                                                 dex_cache,
                                                 class_loader,
@@ -2215,15 +2042,15 @@
                                                 Runtime::Current()->GetCompilerCallbacks(),
                                                 true /* allow soft failures */,
                                                 log_level_,
-                                                &error_msg) ==
-                                                    verifier::MethodVerifier::kHardFailure) {
+                                                &error_msg);
+      if (failure_kind == verifier::MethodVerifier::kHardFailure) {
         LOG(ERROR) << "Verification failed on class " << PrettyDescriptor(descriptor)
                    << " because: " << error_msg;
         manager_->GetCompiler()->SetHadHardVerifierFailure();
       }
     } else if (!SkipClass(jclass_loader, dex_file, klass.Get())) {
-      CHECK(klass->IsResolved()) << PrettyClass(klass.Get());
-      class_linker->VerifyClass(soa.Self(), klass, log_level_);
+      CHECK(klass->IsResolved()) << klass->PrettyClass();
+      failure_kind = class_linker->VerifyClass(soa.Self(), klass, log_level_);
 
       if (klass->IsErroneous()) {
         // ClassLinker::VerifyClass throws, which isn't useful in the compiler.
@@ -2233,14 +2060,26 @@
       }
 
       CHECK(klass->IsCompileTimeVerified() || klass->IsErroneous())
-          << PrettyDescriptor(klass.Get()) << ": state=" << klass->GetStatus();
+          << klass->PrettyDescriptor() << ": state=" << klass->GetStatus();
 
       // It is *very* problematic if there are verification errors in the boot classpath. For example,
       // we rely on things working OK without verification when the decryption dialog is brought up.
       // So abort in a debug build if we find this violated.
-      DCHECK(!manager_->GetCompiler()->GetCompilerOptions().IsBootImage() || klass->IsVerified())
-          << "Boot classpath class " << PrettyClass(klass.Get()) << " failed to fully verify.";
+      if (kIsDebugBuild) {
+        // TODO(narayan): Remove this special case for signature polymorphic
+        // invokes once verifier support is fully implemented.
+        if (manager_->GetCompiler()->GetCompilerOptions().IsBootImage() &&
+            !android::base::StartsWith(descriptor, "Ljava/lang/invoke/")) {
+          DCHECK(klass->IsVerified()) << "Boot classpath class " << klass->PrettyClass()
+              << " failed to fully verify: state= " << klass->GetStatus();
+        }
+      }
+    } else {
+      // Make the skip a soft failure, essentially being considered as verify at runtime.
+      failure_kind = verifier::MethodVerifier::kSoftFailure;
     }
+    verifier::VerifierDeps::MaybeRecordVerificationStatus(
+        dex_file, class_def.class_idx_, failure_kind);
     soa.Self()->AssertNoPendingException();
   }
 
@@ -2452,24 +2291,34 @@
 
 class InitializeArrayClassesAndCreateConflictTablesVisitor : public ClassVisitor {
  public:
-  virtual bool operator()(mirror::Class* klass) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
+  explicit InitializeArrayClassesAndCreateConflictTablesVisitor(VariableSizedHandleScope& hs)
+      : hs_(hs) {}
+
+  virtual bool operator()(ObjPtr<mirror::Class> klass) OVERRIDE
+      REQUIRES_SHARED(Locks::mutator_lock_) {
     if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(klass)) {
       return true;
     }
     if (klass->IsArrayClass()) {
       StackHandleScope<1> hs(Thread::Current());
-      Runtime::Current()->GetClassLinker()->EnsureInitialized(hs.Self(),
-                                                              hs.NewHandle(klass),
-                                                              true,
-                                                              true);
+      auto h_klass = hs.NewHandleWrapper(&klass);
+      Runtime::Current()->GetClassLinker()->EnsureInitialized(hs.Self(), h_klass, true, true);
     }
-    // Create the conflict tables.
-    FillIMTAndConflictTables(klass);
+    // Collect handles since there may be thread suspension in future EnsureInitialized.
+    to_visit_.push_back(hs_.NewHandle(klass));
     return true;
   }
 
+  void FillAllIMTAndConflictTables() REQUIRES_SHARED(Locks::mutator_lock_) {
+    for (Handle<mirror::Class> c : to_visit_) {
+      // Create the conflict tables.
+      FillIMTAndConflictTables(c.Get());
+    }
+  }
+
  private:
-  void FillIMTAndConflictTables(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_) {
+  void FillIMTAndConflictTables(ObjPtr<mirror::Class> klass)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
     if (!klass->ShouldHaveImt()) {
       return;
     }
@@ -2485,7 +2334,9 @@
     visited_classes_.insert(klass);
   }
 
-  std::set<mirror::Class*> visited_classes_;
+  VariableSizedHandleScope& hs_;
+  std::vector<Handle<mirror::Class>> to_visit_;
+  std::unordered_set<ObjPtr<mirror::Class>, HashObjPtr> visited_classes_;
 };
 
 void CompilerDriver::InitializeClasses(jobject class_loader,
@@ -2503,8 +2354,10 @@
     // Also create conflict tables.
     // Only useful if we are compiling an image (image_classes_ is not null).
     ScopedObjectAccess soa(Thread::Current());
-    InitializeArrayClassesAndCreateConflictTablesVisitor visitor;
+    VariableSizedHandleScope hs(soa.Self());
+    InitializeArrayClassesAndCreateConflictTablesVisitor visitor(hs);
     Runtime::Current()->GetClassLinker()->VisitClassesWithoutClassesLock(&visitor);
+    visitor.FillAllIMTAndConflictTables();
   }
   if (GetCompilerOptions().IsBootImage()) {
     // Prune garbage objects created during aborted transactions.
@@ -2677,14 +2530,14 @@
                                        CompiledMethod* const compiled_method,
                                        size_t non_relative_linker_patch_count) {
   DCHECK(GetCompiledMethod(method_ref) == nullptr)
-      << PrettyMethod(method_ref.dex_method_index, *method_ref.dex_file);
+      << method_ref.dex_file->PrettyMethod(method_ref.dex_method_index);
   {
     MutexLock mu(Thread::Current(), compiled_methods_lock_);
     compiled_methods_.Put(method_ref, compiled_method);
     non_relative_linker_patch_count_ += non_relative_linker_patch_count;
   }
   DCHECK(GetCompiledMethod(method_ref) != nullptr)
-      << PrettyMethod(method_ref.dex_method_index, *method_ref.dex_file);
+      << method_ref.dex_file->PrettyMethod(method_ref.dex_method_index);
 }
 
 void CompilerDriver::RemoveCompiledMethod(const MethodReference& method_ref) {
diff --git a/compiler/driver/compiler_driver.h b/compiler/driver/compiler_driver.h
index eb1222c..c8d6cb0 100644
--- a/compiler/driver/compiler_driver.h
+++ b/compiler/driver/compiler_driver.h
@@ -50,6 +50,8 @@
 
 namespace verifier {
 class MethodVerifier;
+class VerifierDeps;
+class VerifierDepsTest;
 }  // namespace verifier
 
 class BitVector;
@@ -116,6 +118,7 @@
 
   void CompileAll(jobject class_loader,
                   const std::vector<const DexFile*>& dex_files,
+                  verifier::VerifierDeps* verifier_deps,
                   TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_, !dex_to_dex_references_lock_);
 
@@ -182,15 +185,6 @@
                                   uint16_t class_def_index)
       REQUIRES(!requires_constructor_barrier_lock_);
 
-  // Callbacks from compiler to see what runtime checks must be generated.
-
-  bool CanAssumeTypeIsPresentInDexCache(Handle<mirror::DexCache> dex_cache,
-                                        uint32_t type_idx)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
-  bool CanAssumeStringIsPresentInDexCache(const DexFile& dex_file, uint32_t string_idx)
-      REQUIRES(!Locks::mutator_lock_);
-
   // Are runtime access checks necessary in the compiled code?
   bool CanAccessTypeWithoutChecks(uint32_t referrer_idx,
                                   Handle<mirror::DexCache> dex_cache,
@@ -205,24 +199,6 @@
                                               bool* out_is_finalizable)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  bool CanEmbedTypeInCode(const DexFile& dex_file, uint32_t type_idx,
-                          bool* is_type_initialized, bool* use_direct_type_ptr,
-                          uintptr_t* direct_type_ptr, bool* out_is_finalizable);
-
-  // Query methods for the java.lang.ref.Reference class.
-  bool CanEmbedReferenceTypeInCode(ClassReference* ref,
-                                   bool* use_direct_type_ptr, uintptr_t* direct_type_ptr);
-  uint32_t GetReferenceSlowFlagOffset() const;
-  uint32_t GetReferenceDisableFlagOffset() const;
-
-  // Get the DexCache for the
-  mirror::DexCache* GetDexCache(const DexCompilationUnit* mUnit)
-    REQUIRES_SHARED(Locks::mutator_lock_);
-
-  mirror::ClassLoader* GetClassLoader(const ScopedObjectAccess& soa,
-                                      const DexCompilationUnit* mUnit)
-    REQUIRES_SHARED(Locks::mutator_lock_);
-
   // Resolve compiling method's class. Returns null on failure.
   mirror::Class* ResolveCompilingMethodsClass(
       const ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
@@ -250,19 +226,6 @@
       uint32_t field_idx, bool is_static)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Get declaration location of a resolved field.
-  void GetResolvedFieldDexFileLocation(
-      ArtField* resolved_field, const DexFile** declaring_dex_file,
-      uint16_t* declaring_class_idx, uint16_t* declaring_field_idx)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
-  bool IsFieldVolatile(ArtField* field) REQUIRES_SHARED(Locks::mutator_lock_);
-  MemberOffset GetFieldOffset(ArtField* field) REQUIRES_SHARED(Locks::mutator_lock_);
-
-  // Find a dex cache for a dex file.
-  inline mirror::DexCache* FindDexCache(const DexFile* dex_file)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
   // Can we fast-path an IGET/IPUT access to an instance field? If yes, compute the field offset.
   std::pair<bool, bool> IsFastInstanceField(
       mirror::DexCache* dex_cache, mirror::Class* referrer_class,
@@ -288,15 +251,6 @@
                                                 uint32_t* storage_index)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Is static field's in referrer's class?
-  bool IsStaticFieldInReferrerClass(mirror::Class* referrer_class, ArtField* resolved_field)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
-  // Is static field's class initialized?
-  bool IsStaticFieldsClassInitialized(mirror::Class* referrer_class,
-                                      ArtField* resolved_field)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
   // Resolve a method. Returns null on failure, including incompatible class change.
   ArtMethod* ResolveMethod(
       ScopedObjectAccess& soa, Handle<mirror::DexCache> dex_cache,
@@ -304,37 +258,8 @@
       uint32_t method_idx, InvokeType invoke_type, bool check_incompatible_class_change = true)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Get declaration location of a resolved field.
-  void GetResolvedMethodDexFileLocation(
-      ArtMethod* resolved_method, const DexFile** declaring_dex_file,
-      uint16_t* declaring_class_idx, uint16_t* declaring_method_idx)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
-  // Get the index in the vtable of the method.
-  uint16_t GetResolvedMethodVTableIndex(
-      ArtMethod* resolved_method, InvokeType type)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
-  // Is method's class initialized for an invoke?
-  // For static invokes to determine whether we need to consider potential call to <clinit>().
-  // For non-static invokes, assuming a non-null reference, the class is always initialized.
-  bool IsMethodsClassInitialized(mirror::Class* referrer_class, ArtMethod* resolved_method)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
-  // Get the layout of dex cache arrays for a dex file. Returns invalid layout if the
-  // dex cache arrays don't have a fixed layout.
-  DexCacheArraysLayout GetDexCacheArraysLayout(const DexFile* dex_file);
-
   void ProcessedInstanceField(bool resolved);
   void ProcessedStaticField(bool resolved, bool local);
-  void ProcessedInvoke(InvokeType invoke_type, int flags);
-
-  void ComputeFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit,
-                        const ScopedObjectAccess& soa, bool is_static,
-                        ArtField** resolved_field,
-                        mirror::Class** referrer_class,
-                        mirror::DexCache** dex_cache)
-      REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Can we fast path instance field access? Computes field's offset and volatility.
   bool ComputeInstanceFieldInfo(uint32_t field_idx, const DexCompilationUnit* mUnit, bool is_put,
@@ -386,6 +311,7 @@
   void SetDedupeEnabled(bool dedupe_enabled) {
     compiled_method_storage_.SetDedupeEnabled(dedupe_enabled);
   }
+
   bool DedupeEnabled() const {
     return compiled_method_storage_.DedupeEnabled();
   }
@@ -449,6 +375,13 @@
     return current_dex_to_dex_methods_;
   }
 
+  // Compute constant code and method pointers when possible.
+  void GetCodeAndMethodForDirectCall(const mirror::Class* referrer_class,
+                                     ArtMethod* method,
+                                     /* out */ uintptr_t* direct_code,
+                                     /* out */ uintptr_t* direct_method)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
  private:
   // Return whether the declaring class of `resolved_member` is
   // available to `referrer_class` for read or write access using two
@@ -477,42 +410,14 @@
                                       uint32_t field_idx)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Can we assume that the klass is initialized?
-  bool CanAssumeClassIsInitialized(mirror::Class* klass)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-  bool CanReferrerAssumeClassIsInitialized(mirror::Class* referrer_class, mirror::Class* klass)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
-  // These flags are internal to CompilerDriver for collecting INVOKE resolution statistics.
-  // The only external contract is that unresolved method has flags 0 and resolved non-0.
-  enum {
-    kBitMethodResolved = 0,
-    kBitVirtualMadeDirect,
-    kBitPreciseTypeDevirtualization,
-    kBitDirectCallToBoot,
-    kBitDirectMethodToBoot
-  };
-  static constexpr int kFlagMethodResolved              = 1 << kBitMethodResolved;
-  static constexpr int kFlagVirtualMadeDirect           = 1 << kBitVirtualMadeDirect;
-  static constexpr int kFlagPreciseTypeDevirtualization = 1 << kBitPreciseTypeDevirtualization;
-  static constexpr int kFlagDirectCallToBoot            = 1 << kBitDirectCallToBoot;
-  static constexpr int kFlagDirectMethodToBoot          = 1 << kBitDirectMethodToBoot;
-  static constexpr int kFlagsMethodResolvedVirtualMadeDirect =
-      kFlagMethodResolved | kFlagVirtualMadeDirect;
-  static constexpr int kFlagsMethodResolvedPreciseTypeDevirtualization =
-      kFlagsMethodResolvedVirtualMadeDirect | kFlagPreciseTypeDevirtualization;
-
- public:  // TODO make private or eliminate.
-  // Compute constant code and method pointers when possible.
-  void GetCodeAndMethodForDirectCall(const mirror::Class* referrer_class,
-                                     ArtMethod* method,
-                                     /* out */ uintptr_t* direct_code,
-                                     /* out */ uintptr_t* direct_method)
-      REQUIRES_SHARED(Locks::mutator_lock_);
+  mirror::ClassLoader* GetClassLoader(const ScopedObjectAccess& soa,
+                                      const DexCompilationUnit* mUnit)
+    REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
   void PreCompile(jobject class_loader,
                   const std::vector<const DexFile*>& dex_files,
+                  verifier::VerifierDeps* verifier_deps,
                   TimingLogger* timings)
       REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_);
 
@@ -535,7 +440,9 @@
 
   void Verify(jobject class_loader,
               const std::vector<const DexFile*>& dex_files,
+              verifier::VerifierDeps* verifier_deps,
               TimingLogger* timings);
+
   void VerifyDexFile(jobject class_loader,
                      const DexFile& dex_file,
                      const std::vector<const DexFile*>& dex_files,
@@ -566,8 +473,6 @@
       REQUIRES(!Locks::mutator_lock_, !compiled_classes_lock_);
 
   void UpdateImageClasses(TimingLogger* timings) REQUIRES(!Locks::mutator_lock_);
-  static void FindClinitImageClassesCallback(mirror::Object* object, void* arg)
-      REQUIRES_SHARED(Locks::mutator_lock_);
 
   void Compile(jobject class_loader,
                const std::vector<const DexFile*>& dex_files,
@@ -679,6 +584,7 @@
   const BitVector* current_dex_to_dex_methods_;
 
   friend class CompileClassVisitor;
+  friend class verifier::VerifierDepsTest;
   DISALLOW_COPY_AND_ASSIGN(CompilerDriver);
 };
 
diff --git a/compiler/driver/compiler_driver_test.cc b/compiler/driver/compiler_driver_test.cc
index 96f17ac..9679a79 100644
--- a/compiler/driver/compiler_driver_test.cc
+++ b/compiler/driver/compiler_driver_test.cc
@@ -43,6 +43,7 @@
     TimingLogger::ScopedTiming t(__FUNCTION__, &timings);
     compiler_driver_->CompileAll(class_loader,
                                  GetDexFiles(class_loader),
+                                 /* verifier_deps */ nullptr,
                                  &timings);
     t.NewTiming("MakeAllExecutable");
     MakeAllExecutable(class_loader);
@@ -106,7 +107,7 @@
   ScopedObjectAccess soa(Thread::Current());
   ASSERT_TRUE(java_lang_dex_file_ != nullptr);
   const DexFile& dex = *java_lang_dex_file_;
-  mirror::DexCache* dex_cache = class_linker_->FindDexCache(soa.Self(), dex);
+  ObjPtr<mirror::DexCache> dex_cache = class_linker_->FindDexCache(soa.Self(), dex);
   EXPECT_EQ(dex.NumStringIds(), dex_cache->NumStrings());
   for (size_t i = 0; i < dex_cache->NumStrings(); i++) {
     const mirror::String* string = dex_cache->GetResolvedString(i);
@@ -207,8 +208,8 @@
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
   ScopedObjectAccess soa(self);
   StackHandleScope<1> hs(self);
-  Handle<mirror::ClassLoader> h_loader(hs.NewHandle(
-      reinterpret_cast<mirror::ClassLoader*>(self->DecodeJObject(class_loader))));
+  Handle<mirror::ClassLoader> h_loader(
+      hs.NewHandle(soa.Decode<mirror::ClassLoader>(class_loader)));
   mirror::Class* klass = class_linker->FindClass(self, "LStaticLeafMethods;", h_loader);
   ASSERT_NE(klass, nullptr);
 
@@ -216,7 +217,7 @@
 
   const auto pointer_size = class_linker->GetImagePointerSize();
   for (auto& m : klass->GetDirectMethods(pointer_size)) {
-    std::string name = PrettyMethod(&m, true);
+    std::string name = m.PrettyMethod(true);
     const void* code = m.GetEntryPointFromQuickCompiledCodePtrSize(pointer_size);
     ASSERT_NE(code, nullptr);
     if (expected->find(name) != expected->end()) {
@@ -265,15 +266,15 @@
     Thread* self = Thread::Current();
     ScopedObjectAccess soa(self);
     StackHandleScope<1> hs(self);
-    Handle<mirror::ClassLoader> h_loader(hs.NewHandle(
-        reinterpret_cast<mirror::ClassLoader*>(self->DecodeJObject(class_loader))));
+    Handle<mirror::ClassLoader> h_loader(
+        hs.NewHandle(soa.Decode<mirror::ClassLoader>(class_loader)));
     mirror::Class* klass = class_linker->FindClass(self, clazz.c_str(), h_loader);
     ASSERT_NE(klass, nullptr);
 
     const auto pointer_size = class_linker->GetImagePointerSize();
     size_t number_of_compiled_methods = 0;
     for (auto& m : klass->GetVirtualMethods(pointer_size)) {
-      std::string name = PrettyMethod(&m, true);
+      std::string name = m.PrettyMethod(true);
       const void* code = m.GetEntryPointFromQuickCompiledCodePtrSize(pointer_size);
       ASSERT_NE(code, nullptr);
       if (expected_methods.find(name) != expected_methods.end()) {
diff --git a/compiler/driver/compiler_options.cc b/compiler/driver/compiler_options.cc
index cbcc169..c222f90 100644
--- a/compiler/driver/compiler_options.cc
+++ b/compiler/driver/compiler_options.cc
@@ -37,6 +37,7 @@
       debuggable_(false),
       generate_debug_info_(kDefaultGenerateDebugInfo),
       generate_mini_debug_info_(kDefaultGenerateMiniDebugInfo),
+      generate_build_id_(false),
       implicit_null_checks_(true),
       implicit_so_checks_(true),
       implicit_suspend_checks_(false),
@@ -97,6 +98,7 @@
       debuggable_(debuggable),
       generate_debug_info_(generate_debug_info),
       generate_mini_debug_info_(kDefaultGenerateMiniDebugInfo),
+      generate_build_id_(false),
       implicit_null_checks_(implicit_null_checks),
       implicit_so_checks_(implicit_so_checks),
       implicit_suspend_checks_(implicit_suspend_checks),
@@ -196,6 +198,10 @@
     generate_mini_debug_info_ = true;
   } else if (option == "--no-generate-mini-debug-info") {
     generate_mini_debug_info_ = false;
+  } else if (option == "--generate-build-id") {
+    generate_build_id_ = true;
+  } else if (option == "--no-generate-build-id") {
+    generate_build_id_ = false;
   } else if (option == "--debuggable") {
     debuggable_ = true;
   } else if (option.starts_with("--top-k-profile-threshold=")) {
diff --git a/compiler/driver/compiler_options.h b/compiler/driver/compiler_options.h
index 8e4a775..56b632d 100644
--- a/compiler/driver/compiler_options.h
+++ b/compiler/driver/compiler_options.h
@@ -29,6 +29,12 @@
 
 namespace art {
 
+namespace verifier {
+  class VerifierDepsTest;
+}
+
+class DexFile;
+
 class CompilerOptions FINAL {
  public:
   // Guide heuristics to determine whether to compile method if profile data not available.
@@ -187,6 +193,10 @@
     return generate_mini_debug_info_;
   }
 
+  bool GetGenerateBuildId() const {
+    return generate_build_id_;
+  }
+
   bool GetImplicitNullChecks() const {
     return implicit_null_checks_;
   }
@@ -297,6 +307,7 @@
   bool debuggable_;
   bool generate_debug_info_;
   bool generate_mini_debug_info_;
+  bool generate_build_id_;
   bool implicit_null_checks_;
   bool implicit_so_checks_;
   bool implicit_suspend_checks_;
@@ -331,6 +342,7 @@
 
   friend class Dex2Oat;
   friend class CommonCompilerTest;
+  friend class verifier::VerifierDepsTest;
 
   DISALLOW_COPY_AND_ASSIGN(CompilerOptions);
 };
diff --git a/compiler/driver/dex_compilation_unit.cc b/compiler/driver/dex_compilation_unit.cc
index b0ee448..64fd9e7 100644
--- a/compiler/driver/dex_compilation_unit.cc
+++ b/compiler/driver/dex_compilation_unit.cc
@@ -45,7 +45,7 @@
 const std::string& DexCompilationUnit::GetSymbol() {
   if (symbol_.empty()) {
     symbol_ = "dex_";
-    symbol_ += MangleForJni(PrettyMethod(dex_method_idx_, *dex_file_));
+    symbol_ += MangleForJni(dex_file_->PrettyMethod(dex_method_idx_));
   }
   return symbol_;
 }
diff --git a/compiler/elf_builder.h b/compiler/elf_builder.h
index 73240be..7c02384 100644
--- a/compiler/elf_builder.h
+++ b/compiler/elf_builder.h
@@ -36,6 +36,7 @@
 // The basic layout of the elf file:
 //   Elf_Ehdr                    - The ELF header.
 //   Elf_Phdr[]                  - Program headers for the linker.
+//   .note.gnu.build-id          - Optional build ID section (SHA-1 digest).
 //   .rodata                     - DEX files and oat metadata.
 //   .text                       - Compiled code.
 //   .bss                        - Zero-initialized writeable section.
@@ -75,6 +76,10 @@
 class ElfBuilder FINAL {
  public:
   static constexpr size_t kMaxProgramHeaders = 16;
+  // SHA-1 digest.  Not using SHA_DIGEST_LENGTH from openssl/sha.h to avoid
+  // spreading this header dependency for just this single constant.
+  static constexpr size_t kBuildIdLen = 20;
+
   using Elf_Addr = typename ElfTypes::Addr;
   using Elf_Off = typename ElfTypes::Off;
   using Elf_Word = typename ElfTypes::Word;
@@ -458,6 +463,49 @@
     } abiflags_;
   };
 
+  class BuildIdSection FINAL : public Section {
+   public:
+    BuildIdSection(ElfBuilder<ElfTypes>* owner,
+                   const std::string& name,
+                   Elf_Word type,
+                   Elf_Word flags,
+                   const Section* link,
+                   Elf_Word info,
+                   Elf_Word align,
+                   Elf_Word entsize)
+        : Section(owner, name, type, flags, link, info, align, entsize),
+          digest_start_(-1) {
+    }
+
+    void Write() {
+      // The size fields are 32-bit on both 32-bit and 64-bit systems, confirmed
+      // with the 64-bit linker and libbfd code. The size of name and desc must
+      // be a multiple of 4 and it currently is.
+      this->WriteUint32(4);  // namesz.
+      this->WriteUint32(kBuildIdLen);  // descsz.
+      this->WriteUint32(3);  // type = NT_GNU_BUILD_ID.
+      this->WriteFully("GNU", 4);  // name.
+      digest_start_ = this->Seek(0, kSeekCurrent);
+      static_assert(kBuildIdLen % 4 == 0, "expecting a mutliple of 4 for build ID length");
+      this->WriteFully(std::string(kBuildIdLen, '\0').c_str(), kBuildIdLen);  // desc.
+    }
+
+    off_t GetDigestStart() {
+      CHECK_GT(digest_start_, 0);
+      return digest_start_;
+    }
+
+   private:
+    bool WriteUint32(uint32_t v) {
+      return this->WriteFully(&v, sizeof(v));
+    }
+
+    // File offset where the build ID digest starts.
+    // Populated with zeros first, then updated with the actual value as the
+    // very last thing in the output file creation.
+    off_t digest_start_;
+  };
+
   ElfBuilder(InstructionSet isa, const InstructionSetFeatures* features, OutputStream* output)
       : isa_(isa),
         features_(features),
@@ -479,6 +527,7 @@
         shstrtab_(this, ".shstrtab", 0, 1),
         abiflags_(this, ".MIPS.abiflags", SHT_MIPS_ABIFLAGS, SHF_ALLOC, nullptr, 0, kPageSize, 0,
                   isa, features),
+        build_id_(this, ".note.gnu.build-id", SHT_NOTE, SHF_ALLOC, nullptr, 0, 4, 0),
         started_(false),
         write_program_headers_(false),
         loaded_size_(0u),
@@ -489,6 +538,7 @@
     dynamic_.phdr_type_ = PT_DYNAMIC;
     eh_frame_hdr_.phdr_type_ = PT_GNU_EH_FRAME;
     abiflags_.phdr_type_ = PT_MIPS_ABIFLAGS;
+    build_id_.phdr_type_ = PT_NOTE;
   }
   ~ElfBuilder() {}
 
@@ -741,6 +791,17 @@
     abiflags_.End();
   }
 
+  void WriteBuildIdSection() {
+    build_id_.Start();
+    build_id_.Write();
+    build_id_.End();
+  }
+
+  void WriteBuildId(uint8_t build_id[kBuildIdLen]) {
+    stream_.Seek(build_id_.GetDigestStart(), kSeekSet);
+    stream_.WriteFully(build_id, kBuildIdLen);
+  }
+
   // Returns true if all writes and seeks on the output stream succeeded.
   bool Good() {
     return stream_.Good();
@@ -818,7 +879,7 @@
     elf_header.e_ident[EI_MAG2]       = ELFMAG2;
     elf_header.e_ident[EI_MAG3]       = ELFMAG3;
     elf_header.e_ident[EI_CLASS]      = (sizeof(Elf_Addr) == sizeof(Elf32_Addr))
-                                         ? ELFCLASS32 : ELFCLASS64;;
+                                         ? ELFCLASS32 : ELFCLASS64;
     elf_header.e_ident[EI_DATA]       = ELFDATA2LSB;
     elf_header.e_ident[EI_VERSION]    = EV_CURRENT;
     elf_header.e_ident[EI_OSABI]      = ELFOSABI_LINUX;
@@ -932,6 +993,7 @@
   Section debug_line_;
   StringSection shstrtab_;
   AbiflagsSection abiflags_;
+  BuildIdSection build_id_;
   std::vector<std::unique_ptr<Section>> other_sections_;
 
   // List of used section in the order in which they were written.
diff --git a/compiler/elf_writer_quick.cc b/compiler/elf_writer_quick.cc
index 36cd232..0d6575c 100644
--- a/compiler/elf_writer_quick.cc
+++ b/compiler/elf_writer_quick.cc
@@ -16,6 +16,7 @@
 
 #include "elf_writer_quick.h"
 
+#include <openssl/sha.h>
 #include <unordered_map>
 #include <unordered_set>
 
@@ -126,6 +127,8 @@
   std::unique_ptr<DebugInfoTask> debug_info_task_;
   std::unique_ptr<ThreadPool> debug_info_thread_pool_;
 
+  void ComputeFileBuildId(uint8_t (*build_id)[ElfBuilder<ElfTypes>::kBuildIdLen]);
+
   DISALLOW_IMPLICIT_CONSTRUCTORS(ElfWriterQuick);
 };
 
@@ -167,6 +170,9 @@
 template <typename ElfTypes>
 void ElfWriterQuick<ElfTypes>::Start() {
   builder_->Start();
+  if (compiler_options_->GetGenerateBuildId()) {
+    builder_->WriteBuildIdSection();
+  }
 }
 
 template <typename ElfTypes>
@@ -275,11 +281,36 @@
 template <typename ElfTypes>
 bool ElfWriterQuick<ElfTypes>::End() {
   builder_->End();
-
+  if (compiler_options_->GetGenerateBuildId()) {
+    uint8_t build_id[ElfBuilder<ElfTypes>::kBuildIdLen];
+    ComputeFileBuildId(&build_id);
+    builder_->WriteBuildId(build_id);
+  }
   return builder_->Good();
 }
 
 template <typename ElfTypes>
+void ElfWriterQuick<ElfTypes>::ComputeFileBuildId(
+    uint8_t (*build_id)[ElfBuilder<ElfTypes>::kBuildIdLen]) {
+  constexpr int kBufSize = 8192;
+  std::vector<char> buffer(kBufSize);
+  int64_t offset = 0;
+  SHA_CTX ctx;
+  SHA1_Init(&ctx);
+  while (true) {
+    int64_t bytes_read = elf_file_->Read(buffer.data(), kBufSize, offset);
+    CHECK_GE(bytes_read, 0);
+    if (bytes_read == 0) {
+      // End of file.
+      break;
+    }
+    SHA1_Update(&ctx, buffer.data(), bytes_read);
+    offset += bytes_read;
+  }
+  SHA1_Final(*build_id, &ctx);
+}
+
+template <typename ElfTypes>
 OutputStream* ElfWriterQuick<ElfTypes>::GetStream() {
   return builder_->GetStream();
 }
diff --git a/compiler/elf_writer_test.cc b/compiler/elf_writer_test.cc
index d5f1663..6f48779 100644
--- a/compiler/elf_writer_test.cc
+++ b/compiler/elf_writer_test.cc
@@ -94,13 +94,32 @@
                                               /*low_4gb*/false,
                                               &error_msg));
     CHECK(ef.get() != nullptr) << error_msg;
-    CHECK(ef->Load(false, /*low_4gb*/false, &error_msg)) << error_msg;
+    CHECK(ef->Load(file.get(), false, /*low_4gb*/false, &error_msg)) << error_msg;
     EXPECT_EQ(dl_oatdata, ef->FindDynamicSymbolAddress("oatdata"));
     EXPECT_EQ(dl_oatexec, ef->FindDynamicSymbolAddress("oatexec"));
     EXPECT_EQ(dl_oatlastword, ef->FindDynamicSymbolAddress("oatlastword"));
   }
 }
 
+TEST_F(ElfWriterTest, CheckBuildIdPresent) {
+  std::string elf_location = GetCoreOatLocation();
+  std::string elf_filename = GetSystemImageFilename(elf_location.c_str(), kRuntimeISA);
+  LOG(INFO) << "elf_filename=" << elf_filename;
+
+  std::unique_ptr<File> file(OS::OpenFileForReading(elf_filename.c_str()));
+  ASSERT_TRUE(file.get() != nullptr);
+  {
+    std::string error_msg;
+    std::unique_ptr<ElfFile> ef(ElfFile::Open(file.get(),
+                                              false,
+                                              false,
+                                              /*low_4gb*/false,
+                                              &error_msg));
+    CHECK(ef.get() != nullptr) << error_msg;
+    EXPECT_TRUE(ef->HasSection(".note.gnu.build-id"));
+  }
+}
+
 TEST_F(ElfWriterTest, EncodeDecodeOatPatches) {
   const std::vector<std::vector<uintptr_t>> test_data {
       { 0, 4, 8, 15, 128, 200 },
diff --git a/compiler/generate-operator-out.py b/compiler/generate-operator-out.py
new file mode 120000
index 0000000..cc291d2
--- /dev/null
+++ b/compiler/generate-operator-out.py
@@ -0,0 +1 @@
+../tools/generate-operator-out.py
\ No newline at end of file
diff --git a/compiler/image_test.cc b/compiler/image_test.cc
index 9e94b9d..fcb8979 100644
--- a/compiler/image_test.cc
+++ b/compiler/image_test.cc
@@ -189,7 +189,7 @@
       TimingLogger timings("ImageTest::WriteRead", false, false);
       TimingLogger::ScopedTiming t("CompileAll", &timings);
       driver->SetDexFilesForOatFile(class_path);
-      driver->CompileAll(class_loader, class_path, &timings);
+      driver->CompileAll(class_loader, class_path, /* verifier_deps */ nullptr, &timings);
 
       t.NewTiming("WriteElf");
       SafeMap<std::string, std::string> key_value_store;
@@ -260,7 +260,8 @@
         OatWriter* const oat_writer = oat_writers[i].get();
         ElfWriter* const elf_writer = elf_writers[i].get();
         std::vector<const DexFile*> cur_dex_files(1u, class_path[i]);
-        oat_writer->PrepareLayout(driver, writer.get(), cur_dex_files, &patcher);
+        oat_writer->Initialize(driver, writer.get(), cur_dex_files);
+        oat_writer->PrepareLayout(&patcher);
         size_t rodata_size = oat_writer->GetOatHeader().GetExecutableOffset();
         size_t text_size = oat_writer->GetOatSize() - rodata_size;
         elf_writer->PrepareDynamicSection(rodata_size,
diff --git a/compiler/image_writer.cc b/compiler/image_writer.cc
index 66938b2..51ef440 100644
--- a/compiler/image_writer.cc
+++ b/compiler/image_writer.cc
@@ -51,6 +51,7 @@
 #include "lock_word.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
+#include "mirror/class_ext.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache.h"
 #include "mirror/dex_cache-inl.h"
@@ -114,7 +115,7 @@
 static void CheckNoDexObjectsCallback(Object* obj, void* arg ATTRIBUTE_UNUSED)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   Class* klass = obj->GetClass();
-  CHECK_NE(PrettyClass(klass), "com.android.dex.Dex");
+  CHECK_NE(Class::PrettyClass(klass), "com.android.dex.Dex");
 }
 
 static void CheckNoDexObjects() {
@@ -433,9 +434,9 @@
   Thread* const self = Thread::Current();
   ReaderMutexLock mu(self, *class_linker->DexLock());
   for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
-    mirror::DexCache* dex_cache =
-        down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
-    if (dex_cache == nullptr || IsInBootImage(dex_cache)) {
+    ObjPtr<mirror::DexCache> dex_cache =
+        ObjPtr<mirror::DexCache>::DownCast(self->DecodeJObject(data.weak_root));
+    if (dex_cache == nullptr || IsInBootImage(dex_cache.Ptr())) {
       continue;
     }
     const DexFile* dex_file = dex_cache->GetDexFile();
@@ -461,10 +462,18 @@
                                dex_cache);
     DCHECK_EQ(dex_file->NumStringIds() != 0u, dex_cache->GetStrings() != nullptr);
     AddDexCacheArrayRelocation(dex_cache->GetStrings(), start + layout.StringsOffset(), dex_cache);
+
+    if (dex_cache->GetResolvedMethodTypes() != nullptr) {
+      AddDexCacheArrayRelocation(dex_cache->GetResolvedMethodTypes(),
+                                 start + layout.MethodTypesOffset(),
+                                 dex_cache);
+    }
   }
 }
 
-void ImageWriter::AddDexCacheArrayRelocation(void* array, size_t offset, DexCache* dex_cache) {
+void ImageWriter::AddDexCacheArrayRelocation(void* array,
+                                             size_t offset,
+                                             ObjPtr<mirror::DexCache> dex_cache) {
   if (array != nullptr) {
     DCHECK(!IsInBootImage(array));
     size_t oat_index = GetOatIndexForDexCache(dex_cache);
@@ -481,7 +490,7 @@
       if (method != nullptr && !method->IsRuntimeMethod()) {
         mirror::Class* klass = method->GetDeclaringClass();
         CHECK(klass == nullptr || KeepClass(klass))
-            << PrettyClass(klass) << " should be a kept class";
+            << Class::PrettyClass(klass) << " should be a kept class";
       }
     }
   }
@@ -690,7 +699,7 @@
 
 class ComputeLazyFieldsForClassesVisitor : public ClassVisitor {
  public:
-  bool operator()(Class* c) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
+  bool operator()(ObjPtr<Class> c) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
     StackHandleScope<1> hs(Thread::Current());
     mirror::Class::ComputeName(hs.NewHandle(c));
     return true;
@@ -749,7 +758,8 @@
   if (klass->GetStatus() == mirror::Class::kStatusError) {
     result = true;
   } else {
-    CHECK(klass->GetVerifyError() == nullptr) << PrettyClass(klass);
+    ObjPtr<mirror::ClassExt> ext(klass->GetExtData());
+    CHECK(ext.IsNull() || ext->GetVerifyError() == nullptr) << klass->PrettyClass();
   }
   if (!result) {
     // Check interfaces since these wont be visited through VisitReferences.)
@@ -831,9 +841,9 @@
  public:
   explicit NonImageClassesVisitor(ImageWriter* image_writer) : image_writer_(image_writer) {}
 
-  bool operator()(Class* klass) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
-    if (!image_writer_->KeepClass(klass)) {
-      classes_to_prune_.insert(klass);
+  bool operator()(ObjPtr<Class> klass) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
+    if (!image_writer_->KeepClass(klass.Ptr())) {
+      classes_to_prune_.insert(klass.Ptr());
     }
     return true;
   }
@@ -878,7 +888,7 @@
     if (self->IsJWeakCleared(data.weak_root)) {
       continue;
     }
-    mirror::DexCache* dex_cache = self->DecodeJObject(data.weak_root)->AsDexCache();
+    ObjPtr<mirror::DexCache> dex_cache = self->DecodeJObject(data.weak_root)->AsDexCache();
     for (size_t i = 0; i < dex_cache->NumResolvedTypes(); i++) {
       Class* klass = dex_cache->GetResolvedType(i);
       if (klass != nullptr && !KeepClass(klass)) {
@@ -902,7 +912,7 @@
       } else {
         // Check that the class is still in the classes table.
         DCHECK(class_linker->ClassInClassTable(declaring_class)) << "Class "
-            << PrettyClass(declaring_class) << " not in class linker table";
+            << Class::PrettyClass(declaring_class) << " not in class linker table";
       }
     }
     ArtField** resolved_fields = dex_cache->GetResolvedFields();
@@ -939,7 +949,7 @@
       image_writer->DumpImageClasses();
       std::string temp;
       CHECK(image_writer->KeepClass(klass)) << klass->GetDescriptor(&temp)
-                                            << " " << PrettyDescriptor(klass);
+                                            << " " << klass->PrettyDescriptor();
     }
   }
 }
@@ -955,21 +965,21 @@
 mirror::String* ImageWriter::FindInternedString(mirror::String* string) {
   Thread* const self = Thread::Current();
   for (const ImageInfo& image_info : image_infos_) {
-    mirror::String* const found = image_info.intern_table_->LookupStrong(self, string);
+    ObjPtr<mirror::String> const found = image_info.intern_table_->LookupStrong(self, string);
     DCHECK(image_info.intern_table_->LookupWeak(self, string) == nullptr)
         << string->ToModifiedUtf8();
     if (found != nullptr) {
-      return found;
+      return found.Ptr();
     }
   }
   if (compile_app_image_) {
     Runtime* const runtime = Runtime::Current();
-    mirror::String* found = runtime->GetInternTable()->LookupStrong(self, string);
+    ObjPtr<mirror::String> found = runtime->GetInternTable()->LookupStrong(self, string);
     // If we found it in the runtime intern table it could either be in the boot image or interned
     // during app image compilation. If it was in the boot image return that, otherwise return null
     // since it belongs to another image space.
-    if (found != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(found)) {
-      return found;
+    if (found != nullptr && runtime->GetHeap()->ObjectIsInBootImageSpace(found.Ptr())) {
+      return found.Ptr();
     }
     DCHECK(runtime->GetInternTable()->LookupWeak(self, string) == nullptr)
         << string->ToModifiedUtf8();
@@ -1005,13 +1015,13 @@
     ReaderMutexLock mu(self, *class_linker->DexLock());
     // Count number of dex caches not in the boot image.
     for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
-      mirror::DexCache* dex_cache =
-          down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
+      ObjPtr<mirror::DexCache> dex_cache =
+          ObjPtr<mirror::DexCache>::DownCast(self->DecodeJObject(data.weak_root));
       if (dex_cache == nullptr) {
         continue;
       }
       const DexFile* dex_file = dex_cache->GetDexFile();
-      if (!IsInBootImage(dex_cache)) {
+      if (!IsInBootImage(dex_cache.Ptr())) {
         dex_cache_count += image_dex_files.find(dex_file) != image_dex_files.end() ? 1u : 0u;
       }
     }
@@ -1024,13 +1034,13 @@
     size_t non_image_dex_caches = 0;
     // Re-count number of non image dex caches.
     for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
-      mirror::DexCache* dex_cache =
-          down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
+      ObjPtr<mirror::DexCache> dex_cache =
+          ObjPtr<mirror::DexCache>::DownCast(self->DecodeJObject(data.weak_root));
       if (dex_cache == nullptr) {
         continue;
       }
       const DexFile* dex_file = dex_cache->GetDexFile();
-      if (!IsInBootImage(dex_cache)) {
+      if (!IsInBootImage(dex_cache.Ptr())) {
         non_image_dex_caches += image_dex_files.find(dex_file) != image_dex_files.end() ? 1u : 0u;
       }
     }
@@ -1038,14 +1048,15 @@
         << "The number of non-image dex caches changed.";
     size_t i = 0;
     for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
-      mirror::DexCache* dex_cache =
-          down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
+      ObjPtr<mirror::DexCache> dex_cache =
+          ObjPtr<mirror::DexCache>::DownCast(self->DecodeJObject(data.weak_root));
       if (dex_cache == nullptr) {
         continue;
       }
       const DexFile* dex_file = dex_cache->GetDexFile();
-      if (!IsInBootImage(dex_cache) && image_dex_files.find(dex_file) != image_dex_files.end()) {
-        dex_caches->Set<false>(i, dex_cache);
+      if (!IsInBootImage(dex_cache.Ptr()) &&
+          image_dex_files.find(dex_file) != image_dex_files.end()) {
+        dex_caches->Set<false>(i, dex_cache.Ptr());
         ++i;
       }
     }
@@ -1079,7 +1090,8 @@
       mirror::String* interned = FindInternedString(obj->AsString());
       if (interned == nullptr) {
         // Not in another image space, insert to our table.
-        interned = GetImageInfo(oat_index).intern_table_->InternStrongImageString(obj->AsString());
+        interned =
+            GetImageInfo(oat_index).intern_table_->InternStrongImageString(obj->AsString()).Ptr();
         DCHECK_EQ(interned, obj);
       }
     } else if (obj->IsDexCache()) {
@@ -1091,7 +1103,7 @@
       DCHECK_NE(as_klass->GetStatus(), mirror::Class::kStatusError);
       if (compile_app_image_) {
         // Extra sanity, no boot loader classes should be left!
-        CHECK(!IsBootClassLoaderClass(as_klass)) << PrettyClass(as_klass);
+        CHECK(!IsBootClassLoaderClass(as_klass)) << as_klass->PrettyClass();
       }
       LengthPrefixedArray<ArtField>* fields[] = {
           as_klass->GetSFieldsPtr(), as_klass->GetIFieldsPtr(),
@@ -1127,7 +1139,7 @@
             ArtField* field = &cur_fields->At(i);
             auto it2 = native_object_relocations_.find(field);
             CHECK(it2 == native_object_relocations_.end()) << "Field at index=" << i
-                << " already assigned " << PrettyField(field) << " static=" << field->IsStatic();
+                << " already assigned " << field->PrettyField() << " static=" << field->IsStatic();
             DCHECK(!IsInBootImage(field));
             native_object_relocations_.emplace(
                 field,
@@ -1259,7 +1271,7 @@
                                      size_t oat_index) {
   DCHECK(!IsInBootImage(method));
   CHECK(!NativeRelocationAssigned(method)) << "Method " << method << " already assigned "
-      << PrettyMethod(method);
+      << ArtMethod::PrettyMethod(method);
   if (method->IsRuntimeMethod()) {
     TryAssignConflictTableOffset(method->GetImtConflictTable(target_ptr_size_), oat_index);
   }
@@ -1273,7 +1285,7 @@
   ImageWriter* writer = reinterpret_cast<ImageWriter*>(arg);
   DCHECK(writer != nullptr);
   if (!Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(obj)) {
-    CHECK(writer->IsImageBinSlotAssigned(obj)) << PrettyTypeOf(obj) << " " << obj;
+    CHECK(writer->IsImageBinSlotAssigned(obj)) << mirror::Object::PrettyTypeOf(obj) << " " << obj;
   }
 }
 
@@ -1320,7 +1332,7 @@
     root->Assign(VisitReference(root->AsMirrorPtr()));
   }
 
-  ALWAYS_INLINE void operator() (mirror::Object* obj,
+  ALWAYS_INLINE void operator() (ObjPtr<mirror::Object> obj,
                                  MemberOffset offset,
                                  bool is_static ATTRIBUTE_UNUSED) const
       REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -1329,8 +1341,8 @@
     obj->SetFieldObject</*kTransactionActive*/false>(offset, VisitReference(ref));
   }
 
-  ALWAYS_INLINE void operator() (mirror::Class* klass ATTRIBUTE_UNUSED,
-                                 mirror::Reference* ref) const
+  ALWAYS_INLINE void operator() (ObjPtr<mirror::Class> klass ATTRIBUTE_UNUSED,
+                                 ObjPtr<mirror::Reference> ref) const
       REQUIRES_SHARED(Locks::mutator_lock_) {
     ref->SetReferent</*kTransactionActive*/false>(
         VisitReference(ref->GetReferent<kWithoutReadBarrier>()));
@@ -1386,7 +1398,7 @@
 
 void ImageWriter::CalculateNewObjectOffsets() {
   Thread* const self = Thread::Current();
-  StackHandleScopeCollection handles(self);
+  VariableSizedHandleScope handles(self);
   std::vector<Handle<ObjectArray<Object>>> image_roots;
   for (size_t i = 0, size = oat_filenames_.size(); i != size; ++i) {
     image_roots.push_back(handles.NewHandle(CreateImageRoots(i)));
@@ -1439,7 +1451,7 @@
     for (size_t i = 0, count = dex_file->NumStringIds(); i < count; ++i) {
       uint32_t utf16_length;
       const char* utf8_data = dex_file->StringDataAndUtf16LengthByIdx(i, &utf16_length);
-      mirror::String* string = intern_table->LookupStrong(self, utf16_length, utf8_data);
+      mirror::String* string = intern_table->LookupStrong(self, utf16_length, utf8_data).Ptr();
       TryAssignBinSlot(work_stack, string, oat_index);
     }
   }
@@ -1489,10 +1501,15 @@
     // Calculate how big the intern table will be after being serialized.
     InternTable* const intern_table = image_info.intern_table_.get();
     CHECK_EQ(intern_table->WeakSize(), 0u) << " should have strong interned all the strings";
-    image_info.intern_table_bytes_ = intern_table->WriteToMemory(nullptr);
+    if (intern_table->StrongSize() != 0u) {
+      image_info.intern_table_bytes_ = intern_table->WriteToMemory(nullptr);
+    }
     // Calculate the size of the class table.
     ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);
-    image_info.class_table_bytes_ += image_info.class_table_->WriteToMemory(nullptr);
+    DCHECK_EQ(image_info.class_table_->NumZygoteClasses(), 0u);
+    if (image_info.class_table_->NumNonZygoteClasses() != 0u) {
+      image_info.class_table_bytes_ += image_info.class_table_->WriteToMemory(nullptr);
+    }
   }
 
   // Calculate bin slot offsets.
@@ -1676,7 +1693,8 @@
 
 ArtMethod* ImageWriter::GetImageMethodAddress(ArtMethod* method) {
   auto it = native_object_relocations_.find(method);
-  CHECK(it != native_object_relocations_.end()) << PrettyMethod(method) << " @ " << method;
+  CHECK(it != native_object_relocations_.end()) << ArtMethod::PrettyMethod(method) << " @ "
+                                                << method;
   size_t oat_index = GetOatIndex(method->GetDexCache());
   ImageInfo& image_info = GetImageInfo(oat_index);
   CHECK_GE(it->second.offset, image_info.image_end_) << "ArtMethods should be after Objects";
@@ -1863,7 +1881,7 @@
 void ImageWriter::FixupPointerArray(mirror::Object* dst, mirror::PointerArray* arr,
                                     mirror::Class* klass, Bin array_type) {
   CHECK(klass->IsArrayClass());
-  CHECK(arr->IsIntArray() || arr->IsLongArray()) << PrettyClass(klass) << " " << arr;
+  CHECK(arr->IsIntArray() || arr->IsLongArray()) << klass->PrettyClass() << " " << arr;
   // Fixup int and long pointers for the ArtMethod or ArtField arrays.
   const size_t num_elements = arr->GetLength();
   dst->SetClass(GetImageAddress(arr->GetClass()));
@@ -1875,15 +1893,15 @@
       if (UNLIKELY(it == native_object_relocations_.end())) {
         if (it->second.IsArtMethodRelocation()) {
           auto* method = reinterpret_cast<ArtMethod*>(elem);
-          LOG(FATAL) << "No relocation entry for ArtMethod " << PrettyMethod(method) << " @ "
-              << method << " idx=" << i << "/" << num_elements << " with declaring class "
-              << PrettyClass(method->GetDeclaringClass());
+          LOG(FATAL) << "No relocation entry for ArtMethod " << method->PrettyMethod() << " @ "
+                     << method << " idx=" << i << "/" << num_elements << " with declaring class "
+                     << Class::PrettyClass(method->GetDeclaringClass());
         } else {
           CHECK_EQ(array_type, kBinArtField);
           auto* field = reinterpret_cast<ArtField*>(elem);
-          LOG(FATAL) << "No relocation entry for ArtField " << PrettyField(field) << " @ "
+          LOG(FATAL) << "No relocation entry for ArtField " << field->PrettyField() << " @ "
               << field << " idx=" << i << "/" << num_elements << " with declaring class "
-              << PrettyClass(field->GetDeclaringClass());
+              << Class::PrettyClass(field->GetDeclaringClass());
         }
         UNREACHABLE();
       } else {
@@ -1938,18 +1956,19 @@
   void VisitRoot(mirror::CompressedReference<mirror::Object>* root ATTRIBUTE_UNUSED) const {}
 
 
-  void operator()(Object* obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const
+  void operator()(ObjPtr<Object> obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const
       REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
-    Object* ref = obj->GetFieldObject<Object, kVerifyNone>(offset);
+    ObjPtr<Object> ref = obj->GetFieldObject<Object, kVerifyNone>(offset);
     // Use SetFieldObjectWithoutWriteBarrier to avoid card marking since we are writing to the
     // image.
     copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(
         offset,
-        image_writer_->GetImageAddress(ref));
+        image_writer_->GetImageAddress(ref.Ptr()));
   }
 
   // java.lang.ref.Reference visitor.
-  void operator()(mirror::Class* klass ATTRIBUTE_UNUSED, mirror::Reference* ref) const
+  void operator()(ObjPtr<mirror::Class> klass ATTRIBUTE_UNUSED,
+                  ObjPtr<mirror::Reference> ref) const
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
     copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(
         mirror::Reference::ReferentOffset(),
@@ -1966,14 +1985,14 @@
   FixupClassVisitor(ImageWriter* image_writer, Object* copy) : FixupVisitor(image_writer, copy) {
   }
 
-  void operator()(Object* obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const
+  void operator()(ObjPtr<Object> obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const
       REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
     DCHECK(obj->IsClass());
     FixupVisitor::operator()(obj, offset, /*is_static*/false);
   }
 
-  void operator()(mirror::Class* klass ATTRIBUTE_UNUSED,
-                  mirror::Reference* ref ATTRIBUTE_UNUSED) const
+  void operator()(ObjPtr<mirror::Class> klass ATTRIBUTE_UNUSED,
+                  ObjPtr<mirror::Reference> ref ATTRIBUTE_UNUSED) const
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
     LOG(FATAL) << "Reference not expected here.";
   }
@@ -1998,7 +2017,7 @@
 
 template <>
 std::string PrettyPrint(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_) {
-  return PrettyMethod(method);
+  return ArtMethod::PrettyMethod(method);
 }
 
 template <typename T>
@@ -2042,7 +2061,7 @@
 void ImageWriter::FixupClass(mirror::Class* orig, mirror::Class* copy) {
   orig->FixupNativePointers(copy, target_ptr_size_, NativeLocationVisitor(this));
   FixupClassVisitor visitor(this, copy);
-  static_cast<mirror::Object*>(orig)->VisitReferences(visitor, visitor);
+  ObjPtr<mirror::Object>(orig)->VisitReferences(visitor, visitor);
 
   // Remove the clinitThreadId. This is required for image determinism.
   copy->SetClinitThreadId(static_cast<pid_t>(0));
@@ -2051,13 +2070,8 @@
 void ImageWriter::FixupObject(Object* orig, Object* copy) {
   DCHECK(orig != nullptr);
   DCHECK(copy != nullptr);
-  if (kUseBakerOrBrooksReadBarrier) {
-    orig->AssertReadBarrierPointer();
-    if (kUseBrooksReadBarrier) {
-      // Note the address 'copy' isn't the same as the image address of 'orig'.
-      copy->SetReadBarrierPointer(GetImageAddress(orig));
-      DCHECK_EQ(copy->GetReadBarrierPointer(), GetImageAddress(orig));
-    }
+  if (kUseBakerReadBarrier) {
+    orig->AssertReadBarrierState();
   }
   auto* klass = orig->GetClass();
   if (klass->IsIntArrayClass() || klass->IsLongArrayClass()) {
@@ -2161,6 +2175,14 @@
       mirror::DexCache::SetElementPtrSize(copy_fields, i, copy, target_ptr_size_);
     }
   }
+  mirror::MethodTypeDexCacheType* orig_method_types = orig_dex_cache->GetResolvedMethodTypes();
+  if (orig_method_types != nullptr) {
+    copy_dex_cache->SetFieldPtrWithSize<false>(mirror::DexCache::ResolvedMethodTypesOffset(),
+                                               NativeLocationInImage(orig_method_types),
+                                               PointerSize::k64);
+    orig_dex_cache->FixupResolvedMethodTypes(NativeCopyLocation(orig_method_types, orig_dex_cache),
+                                             ImageAddressVisitor(this));
+  }
 
   // Remove the DexFile pointers. They will be fixed up when the runtime loads the oat file. Leaving
   // compiler pointers in here will make the output non-deterministic.
@@ -2205,11 +2227,11 @@
 const uint8_t* ImageWriter::GetQuickCode(ArtMethod* method,
                                          const ImageInfo& image_info,
                                          bool* quick_is_interpreted) {
-  DCHECK(!method->IsResolutionMethod()) << PrettyMethod(method);
-  DCHECK_NE(method, Runtime::Current()->GetImtConflictMethod()) << PrettyMethod(method);
-  DCHECK(!method->IsImtUnimplementedMethod()) << PrettyMethod(method);
-  DCHECK(method->IsInvokable()) << PrettyMethod(method);
-  DCHECK(!IsInBootImage(method)) << PrettyMethod(method);
+  DCHECK(!method->IsResolutionMethod()) << method->PrettyMethod();
+  DCHECK_NE(method, Runtime::Current()->GetImtConflictMethod()) << method->PrettyMethod();
+  DCHECK(!method->IsImtUnimplementedMethod()) << method->PrettyMethod();
+  DCHECK(method->IsInvokable()) << method->PrettyMethod();
+  DCHECK(!IsInBootImage(method)) << method->PrettyMethod();
 
   // Use original code if it exists. Otherwise, set the code pointer to the resolution
   // trampoline.
@@ -2287,7 +2309,7 @@
           break;
         }
       }
-      CHECK(found_one) << "Expected to find callee save method but got " << PrettyMethod(orig);
+      CHECK(found_one) << "Expected to find callee save method but got " << orig->PrettyMethod();
       CHECK(copy->IsRuntimeMethod());
     }
   } else {
@@ -2384,12 +2406,10 @@
   return it->second;
 }
 
-size_t ImageWriter::GetOatIndexForDexCache(mirror::DexCache* dex_cache) const {
-  if (dex_cache == nullptr) {
-    return GetDefaultOatIndex();
-  } else {
-    return GetOatIndexForDexFile(dex_cache->GetDexFile());
-  }
+size_t ImageWriter::GetOatIndexForDexCache(ObjPtr<mirror::DexCache> dex_cache) const {
+  return (dex_cache == nullptr)
+      ? GetDefaultOatIndex()
+      : GetOatIndexForDexFile(dex_cache->GetDexFile());
 }
 
 void ImageWriter::UpdateOatFileLayout(size_t oat_index,
diff --git a/compiler/image_writer.h b/compiler/image_writer.h
index acd1681..c9cf4cb 100644
--- a/compiler/image_writer.h
+++ b/compiler/image_writer.h
@@ -132,7 +132,7 @@
   size_t GetOatIndexForDexFile(const DexFile* dex_file) const;
 
   // Get the index of the oat file containing the dex file served by the dex cache.
-  size_t GetOatIndexForDexCache(mirror::DexCache* dex_cache) const
+  size_t GetOatIndexForDexCache(ObjPtr<mirror::DexCache> dex_cache) const
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Update the oat layout for the given oat file.
@@ -334,7 +334,7 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
   BinSlot GetImageBinSlot(mirror::Object* object) const REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void AddDexCacheArrayRelocation(void* array, size_t offset, mirror::DexCache* dex_cache)
+  void AddDexCacheArrayRelocation(void* array, size_t offset, ObjPtr<mirror::DexCache> dex_cache)
       REQUIRES_SHARED(Locks::mutator_lock_);
   void AddMethodPointerArray(mirror::PointerArray* arr) REQUIRES_SHARED(Locks::mutator_lock_);
 
diff --git a/compiler/intrinsics_list.h b/compiler/intrinsics_list.h
index 5877f57..555baf6 100644
--- a/compiler/intrinsics_list.h
+++ b/compiler/intrinsics_list.h
@@ -1,7 +1,7 @@
 /*
- * Copyright (C, "", "", "") 2015 The Android Open Source Project
+ * Copyright (C) 2015 The Android Open Source Project
  *
- * Licensed under the Apache License, Version 2.0 (the "License", "", "", "");
+ * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
@@ -108,8 +108,10 @@
   V(StringCompareTo, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow, "Ljava/lang/String;", "compareTo", "(Ljava/lang/String;)I") \
   V(StringEquals, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow, "Ljava/lang/String;", "equals", "(Ljava/lang/Object;)Z") \
   V(StringGetCharsNoCheck, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow, "Ljava/lang/String;", "getCharsNoCheck", "(II[CI)V") \
-  V(StringIndexOf, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow, "Ljava/lang/String;", "indexOf", "(I)I") \
-  V(StringIndexOfAfter, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow, "Ljava/lang/String;", "indexOf", "(II)I") \
+  V(StringIndexOf, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow, "Ljava/lang/String;", "indexOf", "(I)I") \
+  V(StringIndexOfAfter, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow, "Ljava/lang/String;", "indexOf", "(II)I") \
+  V(StringStringIndexOf, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow, "Ljava/lang/String;", "indexOf", "(Ljava/lang/String;)I") \
+  V(StringStringIndexOfAfter, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kCanThrow, "Ljava/lang/String;", "indexOf", "(Ljava/lang/String;I)I") \
   V(StringIsEmpty, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow, "Ljava/lang/String;", "isEmpty", "()Z") \
   V(StringLength, kVirtual, kNeedsEnvironmentOrCache, kReadSideEffects, kNoThrow, "Ljava/lang/String;", "length", "()I") \
   V(StringNewStringFromBytes, kStatic, kNeedsEnvironmentOrCache, kAllSideEffects, kCanThrow, "Ljava/lang/StringFactory;", "newStringFromBytes", "([BIII)Ljava/lang/String;") \
diff --git a/compiler/jit/jit_compiler.cc b/compiler/jit/jit_compiler.cc
index 4ef2db8..9dfb434 100644
--- a/compiler/jit/jit_compiler.cc
+++ b/compiler/jit/jit_compiler.cc
@@ -124,30 +124,30 @@
     if (option.starts_with("--instruction-set-variant=")) {
       StringPiece str = option.substr(strlen("--instruction-set-variant=")).data();
       VLOG(compiler) << "JIT instruction set variant " << str;
-      instruction_set_features_.reset(InstructionSetFeatures::FromVariant(
-          instruction_set, str.as_string(), &error_msg));
+      instruction_set_features_ = InstructionSetFeatures::FromVariant(
+          instruction_set, str.as_string(), &error_msg);
       if (instruction_set_features_ == nullptr) {
         LOG(WARNING) << "Error parsing " << option << " message=" << error_msg;
       }
     } else if (option.starts_with("--instruction-set-features=")) {
       StringPiece str = option.substr(strlen("--instruction-set-features=")).data();
       VLOG(compiler) << "JIT instruction set features " << str;
-      if (instruction_set_features_.get() == nullptr) {
-        instruction_set_features_.reset(InstructionSetFeatures::FromVariant(
-            instruction_set, "default", &error_msg));
+      if (instruction_set_features_ == nullptr) {
+        instruction_set_features_ = InstructionSetFeatures::FromVariant(
+            instruction_set, "default", &error_msg);
         if (instruction_set_features_ == nullptr) {
           LOG(WARNING) << "Error parsing " << option << " message=" << error_msg;
         }
       }
-      instruction_set_features_.reset(
-          instruction_set_features_->AddFeaturesFromString(str.as_string(), &error_msg));
+      instruction_set_features_ =
+          instruction_set_features_->AddFeaturesFromString(str.as_string(), &error_msg);
       if (instruction_set_features_ == nullptr) {
         LOG(WARNING) << "Error parsing " << option << " message=" << error_msg;
       }
     }
   }
   if (instruction_set_features_ == nullptr) {
-    instruction_set_features_.reset(InstructionSetFeatures::FromCppDefines());
+    instruction_set_features_ = InstructionSetFeatures::FromCppDefines();
   }
   cumulative_logger_.reset(new CumulativeLogger("jit times"));
   compiler_driver_.reset(new CompilerDriver(
@@ -171,19 +171,10 @@
 
   size_t thread_count = compiler_driver_->GetThreadCount();
   if (compiler_options_->GetGenerateDebugInfo()) {
-#ifdef ART_TARGET_ANDROID
-    const char* prefix = "/data/misc/trace";
-#else
-    const char* prefix = "/tmp";
-#endif
     DCHECK_EQ(thread_count, 1u)
         << "Generating debug info only works with one compiler thread";
-    std::string perf_filename = std::string(prefix) + "/perf-" + std::to_string(getpid()) + ".map";
-    perf_file_.reset(OS::CreateEmptyFileWriteOnly(perf_filename.c_str()));
-    if (perf_file_ == nullptr) {
-      LOG(ERROR) << "Could not create perf file at " << perf_filename <<
-                    " Are you on a user build? Perf only works on userdebug/eng builds";
-    }
+    jit_logger_.reset(new JitLogger());
+    jit_logger_->OpenLog();
   }
 
   size_t inline_depth_limit = compiler_driver_->GetCompilerOptions().GetInlineDepthLimit();
@@ -192,9 +183,8 @@
 }
 
 JitCompiler::~JitCompiler() {
-  if (perf_file_ != nullptr) {
-    UNUSED(perf_file_->Flush());
-    UNUSED(perf_file_->Close());
+  if (compiler_options_->GetGenerateDebugInfo()) {
+    jit_logger_->CloseLog();
   }
 }
 
@@ -208,7 +198,7 @@
   // Ensure the class is initialized.
   Handle<mirror::Class> h_class(hs.NewHandle(method->GetDeclaringClass()));
   if (!runtime->GetClassLinker()->EnsureInitialized(self, h_class, true, true)) {
-    VLOG(jit) << "JIT failed to initialize " << PrettyMethod(method);
+    VLOG(jit) << "JIT failed to initialize " << method->PrettyMethod();
     return false;
   }
 
@@ -218,19 +208,8 @@
     TimingLogger::ScopedTiming t2("Compiling", &logger);
     JitCodeCache* const code_cache = runtime->GetJit()->GetCodeCache();
     success = compiler_driver_->GetCompiler()->JitCompile(self, code_cache, method, osr);
-    if (success && (perf_file_ != nullptr)) {
-      const void* ptr = method->GetEntryPointFromQuickCompiledCode();
-      std::ostringstream stream;
-      stream << std::hex
-             << reinterpret_cast<uintptr_t>(ptr)
-             << " "
-             << code_cache->GetMemorySizeOfCodePointer(ptr)
-             << " "
-             << PrettyMethod(method)
-             << std::endl;
-      std::string str = stream.str();
-      bool res = perf_file_->WriteFully(str.c_str(), str.size());
-      CHECK(res);
+    if (success && (jit_logger_ != nullptr)) {
+      jit_logger_->WriteLog(code_cache, method);
     }
   }
 
diff --git a/compiler/jit/jit_compiler.h b/compiler/jit/jit_compiler.h
index ea2747c..f0f24d3 100644
--- a/compiler/jit/jit_compiler.h
+++ b/compiler/jit/jit_compiler.h
@@ -19,6 +19,7 @@
 
 #include "base/mutex.h"
 #include "compiled_method.h"
+#include "jit_logger.h"
 #include "driver/compiler_driver.h"
 #include "driver/compiler_options.h"
 
@@ -50,7 +51,7 @@
   std::unique_ptr<CumulativeLogger> cumulative_logger_;
   std::unique_ptr<CompilerDriver> compiler_driver_;
   std::unique_ptr<const InstructionSetFeatures> instruction_set_features_;
-  std::unique_ptr<File> perf_file_;
+  std::unique_ptr<JitLogger> jit_logger_;
 
   JitCompiler();
 
diff --git a/compiler/jit/jit_logger.cc b/compiler/jit/jit_logger.cc
new file mode 100644
index 0000000..9ce3b0c
--- /dev/null
+++ b/compiler/jit/jit_logger.cc
@@ -0,0 +1,312 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "jit_logger.h"
+
+#include "arch/instruction_set.h"
+#include "art_method-inl.h"
+#include "base/time_utils.h"
+#include "base/unix_file/fd_file.h"
+#include "driver/compiler_driver.h"
+#include "jit/jit.h"
+#include "jit/jit_code_cache.h"
+
+namespace art {
+namespace jit {
+
+#ifdef ART_TARGET_ANDROID
+static const char* kLogPrefix = "/data/misc/trace";
+#else
+static const char* kLogPrefix = "/tmp";
+#endif
+
+// File format of perf-PID.map:
+// +---------------------+
+// |ADDR SIZE symbolname1|
+// |ADDR SIZE symbolname2|
+// |...                  |
+// +---------------------+
+void JitLogger::OpenPerfMapLog() {
+  std::string pid_str = std::to_string(getpid());
+  std::string perf_filename = std::string(kLogPrefix) + "/perf-" + pid_str + ".map";
+  perf_file_.reset(OS::CreateEmptyFileWriteOnly(perf_filename.c_str()));
+  if (perf_file_ == nullptr) {
+    LOG(ERROR) << "Could not create perf file at " << perf_filename <<
+      " Are you on a user build? Perf only works on userdebug/eng builds";
+  }
+}
+
+void JitLogger::WritePerfMapLog(JitCodeCache* code_cache, ArtMethod* method) {
+  if (perf_file_ != nullptr) {
+    const void* ptr = method->GetEntryPointFromQuickCompiledCode();
+    size_t code_size = code_cache->GetMemorySizeOfCodePointer(ptr);
+    std::string method_name = method->PrettyMethod();
+
+    std::ostringstream stream;
+    stream << std::hex
+           << reinterpret_cast<uintptr_t>(ptr)
+           << " "
+           << code_size
+           << " "
+           << method_name
+           << std::endl;
+    std::string str = stream.str();
+    bool res = perf_file_->WriteFully(str.c_str(), str.size());
+    if (!res) {
+      LOG(WARNING) << "Failed to write jitted method info in log: write failure.";
+    }
+  } else {
+    LOG(WARNING) << "Failed to write jitted method info in log: log file doesn't exist.";
+  }
+}
+
+void JitLogger::ClosePerfMapLog() {
+  if (perf_file_ != nullptr) {
+    UNUSED(perf_file_->Flush());
+    UNUSED(perf_file_->Close());
+  }
+}
+
+//  File format of jit-PID.jump:
+//
+//  +--------------------------------+
+//  |  PerfJitHeader                 |
+//  +--------------------------------+
+//  |  PerfJitCodeLoad {             | .
+//  |    struct PerfJitBase;         |  .
+//  |    uint32_t process_id_;       |   .
+//  |    uint32_t thread_id_;        |   .
+//  |    uint64_t vma_;              |   .
+//  |    uint64_t code_address_;     |   .
+//  |    uint64_t code_size_;        |   .
+//  |    uint64_t code_id_;          |   .
+//  |  }                             |   .
+//  +-                              -+   .
+//  |  method_name'\0'               |   +--> one jitted method
+//  +-                              -+   .
+//  |  jitted code binary            |   .
+//  |  ...                           |   .
+//  +--------------------------------+   .
+//  |  PerfJitCodeDebugInfo     {    |   .
+//  |    struct PerfJitBase;         |   .
+//  |    uint64_t address_;          |   .
+//  |    uint64_t entry_count_;      |   .
+//  |    struct PerfJitDebugEntry;   |  .
+//  |  }                             | .
+//  +--------------------------------+
+//  |  PerfJitCodeLoad               |
+//     ...
+//
+struct PerfJitHeader {
+  uint32_t magic_;            // Characters "JiTD"
+  uint32_t version_;          // Header version
+  uint32_t size_;             // Total size of header
+  uint32_t elf_mach_target_;  // Elf mach target
+  uint32_t reserved_;         // Reserved, currently not used
+  uint32_t process_id_;       // Process ID of the JIT compiler
+  uint64_t time_stamp_;       // Timestamp when the header is generated
+  uint64_t flags_;            // Currently the flags are only used for choosing clock for timestamp,
+                              // we set it to 0 to tell perf that we use CLOCK_MONOTONIC clock.
+  static const uint32_t kMagic = 0x4A695444;  // "JiTD"
+  static const uint32_t kVersion = 1;
+};
+
+// Each record starts with such basic information: event type, total size, and timestamp.
+struct PerfJitBase {
+  enum PerfJitEvent {
+    // A jitted code load event.
+    // In ART JIT, it is used to log a new method is jit compiled and committed to jit-code-cache.
+    // Note that such kLoad event supports code cache GC in ART JIT.
+    // For every kLoad event recorded in jit-PID.dump and every perf sample recorded in perf.data,
+    // each event/sample has time stamp. In case code cache GC happens in ART JIT, and a new
+    // jitted method is committed to the same address of a previously deleted method,
+    // the time stamp information can help profiler to tell whether this sample belongs to the
+    // era of the first jitted method, or does it belong to the period of the second jitted method.
+    // JitCodeCache doesn't have to record any event on 'code delete'.
+    kLoad = 0,
+
+    // A jitted code move event, i,e. a jitted code moved from one address to another address.
+    // It helps profiler to map samples to the right symbol even when the code is moved.
+    // In ART JIT, this event can help log such behavior:
+    // A jitted method is recorded in previous kLoad event, but due to some reason,
+    // it is moved to another address in jit-code-cache.
+    kMove = 1,
+
+    // Logs debug line/column information.
+    kDebugInfo = 2,
+
+    // Logs JIT VM end of life event.
+    kClose = 3
+  };
+  uint32_t event_;       // Must be one of the events defined in PerfJitEvent.
+  uint32_t size_;        // Total size of this event record.
+                         // For example, for kLoad event, size of the event record is:
+                         // sizeof(PerfJitCodeLoad) + method_name.size() + compiled code size.
+  uint64_t time_stamp_;  // Timestamp for the event.
+};
+
+// Logs a jitted code load event (kLoad).
+// In ART JIT, it is used to log a new method is jit compiled and commited to jit-code-cache.
+struct PerfJitCodeLoad : PerfJitBase {
+  uint32_t process_id_;    // Process ID who performs the jit code load.
+                           // In ART JIT, it is the pid of the JIT compiler.
+  uint32_t thread_id_;     // Thread ID who performs the jit code load.
+                           // In ART JIT, it is the tid of the JIT compiler.
+  uint64_t vma_;           // Address of the code section. In ART JIT, because code_address_
+                           // uses absolute address, this field is 0.
+  uint64_t code_address_;  // Address where is jitted code is loaded.
+  uint64_t code_size_;     // Size of the jitted code.
+  uint64_t code_id_;       // Unique ID for each jitted code.
+};
+
+// This structure is for source line/column mapping.
+// Currently this feature is not implemented in ART JIT yet.
+struct PerfJitDebugEntry {
+  uint64_t address_;      // Code address which maps to the line/column in source.
+  uint32_t line_number_;  // Source line number starting at 1.
+  uint32_t column_;       // Column discriminator, default 0.
+  const char name_[0];    // Followed by null-terminated name or \0xff\0 if same as previous.
+};
+
+// Logs debug line information (kDebugInfo).
+// This structure is for source line/column mapping.
+// Currently this feature is not implemented in ART JIT yet.
+struct PerfJitCodeDebugInfo : PerfJitBase {
+  uint64_t address_;              // Starting code address which the debug info describes.
+  uint64_t entry_count_;          // How many instances of PerfJitDebugEntry.
+  PerfJitDebugEntry entries_[0];  // Followed by entry_count_ instances of PerfJitDebugEntry.
+};
+
+static uint32_t GetElfMach() {
+#if defined(__arm__)
+  static const uint32_t kElfMachARM = 0x28;
+  return kElfMachARM;
+#elif defined(__aarch64__)
+  static const uint32_t kElfMachARM64 = 0xB7;
+  return kElfMachARM64;
+#elif defined(__i386__)
+  static const uint32_t kElfMachIA32 = 0x3;
+  return kElfMachIA32;
+#elif defined(__x86_64__)
+  static const uint32_t kElfMachX64 = 0x3E;
+  return kElfMachX64;
+#else
+  UNIMPLEMENTED(WARNING) << "Unsupported architecture in JitLogger";
+  return 0;
+#endif
+}
+
+void JitLogger::OpenMarkerFile() {
+  int fd = jit_dump_file_->Fd();
+  // The 'perf inject' tool requires that the jit-PID.dump file
+  // must have a mmap(PROT_READ|PROT_EXEC) record in perf.data.
+  marker_address_ = mmap(nullptr, kPageSize, PROT_READ | PROT_EXEC, MAP_PRIVATE, fd, 0);
+  if (marker_address_ == MAP_FAILED) {
+    LOG(WARNING) << "Failed to create record in perf.data. JITed code profiling will not work.";
+    return;
+  }
+}
+
+void JitLogger::CloseMarkerFile() {
+  if (marker_address_ != nullptr) {
+    munmap(marker_address_, kPageSize);
+  }
+}
+
+void JitLogger::WriteJitDumpDebugInfo() {
+  // In the future, we can add java source file line/column mapping here.
+}
+
+void JitLogger::WriteJitDumpHeader() {
+  PerfJitHeader header;
+
+  std::memset(&header, 0, sizeof(header));
+  header.magic_ = PerfJitHeader::kMagic;
+  header.version_ = PerfJitHeader::kVersion;
+  header.size_ = sizeof(header);
+  header.elf_mach_target_ = GetElfMach();
+  header.process_id_ = static_cast<uint32_t>(getpid());
+  header.time_stamp_ = art::NanoTime();  // CLOCK_MONOTONIC clock is required.
+  header.flags_ = 0;
+
+  bool res = jit_dump_file_->WriteFully(reinterpret_cast<const char*>(&header), sizeof(header));
+  if (!res) {
+    LOG(WARNING) << "Failed to write profiling log. The 'perf inject' tool will not work.";
+  }
+}
+
+void JitLogger::OpenJitDumpLog() {
+  std::string pid_str = std::to_string(getpid());
+  std::string jitdump_filename = std::string(kLogPrefix) + "/jit-" + pid_str + ".dump";
+
+  jit_dump_file_.reset(OS::CreateEmptyFile(jitdump_filename.c_str()));
+  if (jit_dump_file_ == nullptr) {
+    LOG(ERROR) << "Could not create jit dump file at " << jitdump_filename <<
+      " Are you on a user build? Perf only works on userdebug/eng builds";
+    return;
+  }
+
+  OpenMarkerFile();
+
+  // Continue to write jit-PID.dump file even above OpenMarkerFile() fails.
+  // Even if that means 'perf inject' tool cannot work, developers can still use other tools
+  // to map the samples in perf.data to the information (symbol,address,code) recorded
+  // in the jit-PID.dump file, and still proceed the jitted code analysis.
+  WriteJitDumpHeader();
+}
+
+void JitLogger::WriteJitDumpLog(JitCodeCache* code_cache, ArtMethod* method) {
+  if (jit_dump_file_ != nullptr) {
+    const void* code = method->GetEntryPointFromQuickCompiledCode();
+    size_t code_size = code_cache->GetMemorySizeOfCodePointer(code);
+    std::string method_name = method->PrettyMethod();
+
+    PerfJitCodeLoad jit_code;
+    std::memset(&jit_code, 0, sizeof(jit_code));
+    jit_code.event_ = PerfJitCodeLoad::kLoad;
+    jit_code.size_ = sizeof(jit_code) + method_name.size() + 1 + code_size;
+    jit_code.time_stamp_ = art::NanoTime();    // CLOCK_MONOTONIC clock is required.
+    jit_code.process_id_ = static_cast<uint32_t>(getpid());
+    jit_code.thread_id_ = static_cast<uint32_t>(art::GetTid());
+    jit_code.vma_ = 0x0;
+    jit_code.code_address_ = reinterpret_cast<uint64_t>(code);
+    jit_code.code_size_ = code_size;
+    jit_code.code_id_ = code_index_++;
+
+    // Write one complete jitted method info, including:
+    // - PerfJitCodeLoad structure
+    // - Method name
+    // - Complete generated code of this method
+    //
+    // Use UNUSED() here to avoid compiler warnings.
+    UNUSED(jit_dump_file_->WriteFully(reinterpret_cast<const char*>(&jit_code), sizeof(jit_code)));
+    UNUSED(jit_dump_file_->WriteFully(method_name.c_str(), method_name.size() + 1));
+    UNUSED(jit_dump_file_->WriteFully(code, code_size));
+
+    WriteJitDumpDebugInfo();
+  }
+}
+
+void JitLogger::CloseJitDumpLog() {
+  if (jit_dump_file_ != nullptr) {
+    CloseMarkerFile();
+    UNUSED(jit_dump_file_->Flush());
+    UNUSED(jit_dump_file_->Close());
+  }
+}
+
+}  // namespace jit
+}  // namespace art
diff --git a/compiler/jit/jit_logger.h b/compiler/jit/jit_logger.h
new file mode 100644
index 0000000..0f8cfe4
--- /dev/null
+++ b/compiler/jit/jit_logger.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_JIT_JIT_LOGGER_H_
+#define ART_COMPILER_JIT_JIT_LOGGER_H_
+
+#include "base/mutex.h"
+#include "compiled_method.h"
+#include "driver/compiler_driver.h"
+#include "driver/compiler_options.h"
+
+namespace art {
+
+class ArtMethod;
+
+namespace jit {
+
+//
+// JitLogger supports two approaches of perf profiling.
+//
+// (1) perf-map:
+//     The perf-map mechanism generates perf-PID.map file,
+//     which provides simple "address, size, method_name" information to perf,
+//     and allows perf to map samples in jit-code-cache to jitted method symbols.
+//
+//     Command line Example:
+//       $ perf record dalvikvm -Xcompiler-option --generate-debug-info -cp <classpath> Test
+//       $ perf report
+//     NOTE:
+//       - Make sure that the perf-PID.map file is available for 'perf report' tool to access,
+//         so that jitted method can be displayed.
+//
+//
+// (2) perf-inject:
+//     The perf-inject mechansim generates jit-PID.dump file,
+//     which provides rich informations about a jitted method.
+//     It allows perf or other profiling tools to do advanced analysis on jitted code,
+//     for example instruction level profiling.
+//
+//     Command line Example:
+//       $ perf record -k mono dalvikvm -Xcompiler-option --generate-debug-info -cp <classpath> Test
+//       $ perf inject -i perf.data -o perf.data.jitted
+//       $ perf report -i perf.data.jitted
+//       $ perf annotate -i perf.data.jitted
+//     NOTE:
+//       REQUIREMENTS
+//       - The 'perf record -k mono' option requires 4.1 (or higher) Linux kernel.
+//       - The 'perf inject' (generating jit ELF files feature) requires perf 4.6 (or higher).
+//       PERF RECORD
+//       - The '-k mono' option tells 'perf record' to use CLOCK_MONOTONIC clock during sampling;
+//         which is required by 'perf inject', to make sure that both perf.data and jit-PID.dump
+//         have unified clock source for timestamps.
+//       PERF INJECT
+//       - The 'perf inject' tool injects information from jit-PID.dump into perf.data file,
+//         and generates small ELF files (jitted-TID-CODEID.so) for each jitted method.
+//       - On Android devices, the jit-PID.dump file is generated in /data/misc/trace/ folder, and
+//         such location is recorded in perf.data file.
+//         The 'perf inject' tool is going to look for jit-PID.dump and generates small ELF files in
+//         this /data/misc/trace/ folder.
+//         Make sure that you have the read/write access to /data/misc/trace/ folder.
+//       - On non-Android devices, the jit-PID.dump file is generated in /tmp/ folder, and
+//         'perf inject' tool operates on this folder.
+//         Make sure that you have the read/write access to /tmp/ folder.
+//       - If you are executing 'perf inject' on non-Android devices (host), but perf.data and
+//         jit-PID.dump files are adb-pulled from Android devices, make sure that there is a
+//         /data/misc/trace/ folder on host, and jit-PID.dump file is copied to this folder.
+//       - Currently 'perf inject' doesn't provide option to change the path for jit-PID.dump and
+//         generated ELF files.
+//       PERF ANNOTATE
+//       - The 'perf annotate' tool displays assembly level profiling report.
+//         Source code can also be displayed if the ELF file has debug symbols.
+//       - Make sure above small ELF files are available for 'perf annotate' tool to access,
+//         so that jitted code can be displayed in assembly view.
+//
+class JitLogger {
+  public:
+    JitLogger() : code_index_(0), marker_address_(nullptr) {}
+
+    void OpenLog() {
+      OpenPerfMapLog();
+      OpenJitDumpLog();
+    }
+
+    void WriteLog(JitCodeCache* code_cache, ArtMethod* method)
+        REQUIRES_SHARED(Locks::mutator_lock_) {
+      WritePerfMapLog(code_cache, method);
+      WriteJitDumpLog(code_cache, method);
+    }
+
+    void CloseLog() {
+      ClosePerfMapLog();
+      CloseJitDumpLog();
+    }
+
+  private:
+    // For perf-map profiling
+    void OpenPerfMapLog();
+    void WritePerfMapLog(JitCodeCache* code_cache, ArtMethod* method)
+        REQUIRES_SHARED(Locks::mutator_lock_);
+    void ClosePerfMapLog();
+
+    // For perf-inject profiling
+    void OpenJitDumpLog();
+    void WriteJitDumpLog(JitCodeCache* code_cache, ArtMethod* method)
+        REQUIRES_SHARED(Locks::mutator_lock_);
+    void CloseJitDumpLog();
+
+    void OpenMarkerFile();
+    void CloseMarkerFile();
+    void WriteJitDumpHeader();
+    void WriteJitDumpDebugInfo();
+
+    std::unique_ptr<File> perf_file_;
+    std::unique_ptr<File> jit_dump_file_;
+    uint64_t code_index_;
+    void* marker_address_;
+
+    DISALLOW_COPY_AND_ASSIGN(JitLogger);
+};
+
+}  // namespace jit
+}  // namespace art
+
+#endif  // ART_COMPILER_JIT_JIT_LOGGER_H_
diff --git a/compiler/jni/jni_compiler_test.cc b/compiler/jni/jni_compiler_test.cc
index 6b56fe0..a9044a2 100644
--- a/compiler/jni/jni_compiler_test.cc
+++ b/compiler/jni/jni_compiler_test.cc
@@ -387,8 +387,7 @@
 // Test the normal compiler and normal generic JNI only.
 // The following features are unsupported in @FastNative:
 // 1) JNI stubs (lookup via dlsym) when methods aren't explicitly registered
-// 2) Returning objects from the JNI function
-// 3) synchronized keyword
+// 2) synchronized keyword
 // -- TODO: We can support (1) if we remove the mutator lock assert during stub lookup.
 # define JNI_TEST_NORMAL_ONLY(TestName)          \
   TEST_F(JniCompilerTest, TestName ## NormalCompiler) { \
@@ -401,7 +400,6 @@
     ScopedCheckHandleScope top_handle_scope_check;  \
     SCOPED_TRACE("Normal JNI with generic");     \
     gCurrentJni = static_cast<uint32_t>(JniKind::kNormal); \
-    TEST_DISABLED_FOR_MIPS();                    \
     SetCheckGenericJni(true);                    \
     TestName ## Impl();                          \
   }
@@ -420,7 +418,6 @@
     ScopedCheckHandleScope top_handle_scope_check;  \
     SCOPED_TRACE("@FastNative JNI with generic");  \
     gCurrentJni = static_cast<uint32_t>(JniKind::kFast); \
-    TEST_DISABLED_FOR_MIPS();                    \
     SetCheckGenericJni(true);                    \
     TestName ## Impl();                          \
   }
@@ -523,8 +520,7 @@
 
 // Check that the handle scope at the start of this block is the same as the handle scope at the end of the block.
 struct ScopedCheckHandleScope {
-  ScopedCheckHandleScope() {
-    handle_scope_ = Thread::Current()->GetTopHandleScope();
+  ScopedCheckHandleScope() : handle_scope_(Thread::Current()->GetTopHandleScope()) {
   }
 
   ~ScopedCheckHandleScope() {
@@ -533,9 +529,28 @@
         << "invocations have finished (as before they were invoked).";
   }
 
-  HandleScope* handle_scope_;
+  BaseHandleScope* const handle_scope_;
 };
 
+// Number of references allocated in JNI ShadowFrames on the given thread.
+static size_t NumJniShadowFrameReferences(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return self->GetManagedStack()->NumJniShadowFrameReferences();
+}
+
+// Number of references in handle scope on the given thread.
+static size_t NumHandleReferences(Thread* self) {
+  size_t count = 0;
+  for (BaseHandleScope* cur = self->GetTopHandleScope(); cur != nullptr; cur = cur->GetLink()) {
+    count += cur->NumberOfReferences();
+  }
+  return count;
+}
+
+// Number of references allocated in handle scopes & JNI shadow frames on this thread.
+static size_t NumStackReferences(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) {
+  return NumHandleReferences(self) + NumJniShadowFrameReferences(self);
+}
+
 static void expectNumStackReferences(size_t val1, size_t val2) {
   // In rare cases when JNI functions call themselves recursively,
   // disable this test because it will have a false negative.
@@ -543,7 +558,7 @@
     /* @CriticalNative doesn't build a HandleScope, so this test is meaningless then. */
     ScopedObjectAccess soa(Thread::Current());
 
-    size_t actual_num = Thread::Current()->NumStackReferences();
+    size_t actual_num = NumStackReferences(Thread::Current());
     // XX: Not too sure what's going on.
     // Sometimes null references get placed and sometimes they don't?
     EXPECT_TRUE(val1 == actual_num || val2 == actual_num)
@@ -829,8 +844,7 @@
   gJava_MyClassNatives_fooIOO_calls[gCurrentJni] = 0;
 }
 
-// TODO: Maybe. @FastNative support for returning Objects?
-JNI_TEST_NORMAL_ONLY(CompileAndRunIntObjectObjectMethod)
+JNI_TEST(CompileAndRunIntObjectObjectMethod)
 
 int gJava_MyClassNatives_fooSII_calls[kJniKindCount] = {};
 jint Java_MyClassNatives_fooSII(JNIEnv* env ATTRIBUTE_UNUSED,
@@ -1050,8 +1064,7 @@
   gJava_MyClassNatives_fooSIOO_calls[gCurrentJni] = 0;
 }
 
-// TODO: Maybe. @FastNative support for returning Objects?
-JNI_TEST_NORMAL_ONLY(CompileAndRunStaticIntObjectObjectMethod)
+JNI_TEST(CompileAndRunStaticIntObjectObjectMethod)
 
 int gJava_MyClassNatives_fooSSIOO_calls[kJniKindCount] = {};
 jobject Java_MyClassNatives_fooSSIOO(JNIEnv*, jclass klass, jint x, jobject y, jobject z) {
@@ -1219,8 +1232,7 @@
   EXPECT_TRUE(env_->IsSameObject(result, jobj_));
 }
 
-// TODO: Maybe. @FastNative support for returning objects?
-JNI_TEST_NORMAL_ONLY(ReturnGlobalRef)
+JNI_TEST(ReturnGlobalRef)
 
 jint local_ref_test(JNIEnv* env, jobject thisObj, jint x) {
   // Add 10 local references
@@ -1360,8 +1372,7 @@
                                     CurrentJniStringSuffix() + "() with CallStaticObjectMethodV");
 }
 
-// TODO: Maybe support returning objects for @FastNative?
-JNI_TEST_NORMAL_ONLY(UpcallReturnTypeChecking_Instance)
+JNI_TEST(UpcallReturnTypeChecking_Instance)
 
 void JniCompilerTest::UpcallReturnTypeChecking_StaticImpl() {
   SetUpForTest(true, "staticMethodThatShouldReturnClass", "()Ljava/lang/Class;",
@@ -1388,8 +1399,7 @@
                                     CurrentJniStringSuffix() + "() with CallObjectMethodV");
 }
 
-// TODO: Maybe support returning objects for @FastNative?
-JNI_TEST_NORMAL_ONLY(UpcallReturnTypeChecking_Static)
+JNI_TEST(UpcallReturnTypeChecking_Static)
 
 // This should take jclass, but we're imitating a bug pattern.
 void Java_MyClassNatives_instanceMethodThatShouldTakeClass(JNIEnv*, jobject, jclass) {
@@ -2192,8 +2202,7 @@
                "()V",
                CURRENT_JNI_WRAPPER(Java_MyClassNatives_normalNative));
 
-  ScopedObjectAccess soa(Thread::Current());
-  ArtMethod* method = soa.DecodeMethod(jmethod_);
+  ArtMethod* method = jni::DecodeArtMethod(jmethod_);
   ASSERT_TRUE(method != nullptr);
 
   EXPECT_FALSE(method->IsAnnotatedWithCriticalNative());
@@ -2215,8 +2224,7 @@
                "()V",
                CURRENT_JNI_WRAPPER(Java_MyClassNatives_fastNative));
 
-  ScopedObjectAccess soa(Thread::Current());
-  ArtMethod* method = soa.DecodeMethod(jmethod_);
+  ArtMethod* method = jni::DecodeArtMethod(jmethod_);
   ASSERT_TRUE(method != nullptr);
 
   EXPECT_FALSE(method->IsAnnotatedWithCriticalNative());
@@ -2245,8 +2253,7 @@
   UpdateCurrentJni(JniKind::kCritical);
   ASSERT_TRUE(IsCurrentJniCritical());
 
-  ScopedObjectAccess soa(Thread::Current());
-  ArtMethod* method = soa.DecodeMethod(jmethod_);
+  ArtMethod* method = jni::DecodeArtMethod(jmethod_);
   ASSERT_TRUE(method != nullptr);
 
   EXPECT_TRUE(method->IsAnnotatedWithCriticalNative());
diff --git a/compiler/jni/quick/jni_compiler.cc b/compiler/jni/quick/jni_compiler.cc
index 7e58d78..3bd290d 100644
--- a/compiler/jni/quick/jni_compiler.cc
+++ b/compiler/jni/quick/jni_compiler.cc
@@ -70,6 +70,47 @@
   return JNIMacroAssembler<kPointerSize>::Create(arena, isa, features);
 }
 
+enum class JniEntrypoint {
+  kStart,
+  kEnd
+};
+
+template <PointerSize kPointerSize>
+static ThreadOffset<kPointerSize> GetJniEntrypointThreadOffset(JniEntrypoint which,
+                                                               bool reference_return,
+                                                               bool is_synchronized,
+                                                               bool is_fast_native) {
+  if (which == JniEntrypoint::kStart) {  // JniMethodStart
+    ThreadOffset<kPointerSize> jni_start =
+        is_synchronized
+            ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStartSynchronized)
+            : (is_fast_native
+                   ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodFastStart)
+                   : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStart));
+
+    return jni_start;
+  } else {  // JniMethodEnd
+    ThreadOffset<kPointerSize> jni_end(-1);
+    if (reference_return) {
+      // Pass result.
+      jni_end = is_synchronized
+                    ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReferenceSynchronized)
+                    : (is_fast_native
+                           ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodFastEndWithReference)
+                           : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReference));
+    } else {
+      jni_end = is_synchronized
+                    ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndSynchronized)
+                    : (is_fast_native
+                           ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodFastEnd)
+                           : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEnd));
+    }
+
+    return jni_end;
+  }
+}
+
+
 // Generate the JNI bridge for the given method, general contract:
 // - Arguments are in the managed runtime format, either on stack or in
 //   registers, a reference to the method object is supplied as part of this
@@ -96,24 +137,24 @@
   bool is_critical_native = (optimization_flags == Compiler::kCriticalNative);
 
   VLOG(jni) << "JniCompile: Method :: "
-              << art::PrettyMethod(method_idx, dex_file, /* with signature */ true)
+              << dex_file.PrettyMethod(method_idx, /* with signature */ true)
               << " :: access_flags = " << std::hex << access_flags << std::dec;
 
   if (UNLIKELY(is_fast_native)) {
     VLOG(jni) << "JniCompile: Fast native method detected :: "
-              << art::PrettyMethod(method_idx, dex_file, /* with signature */ true);
+              << dex_file.PrettyMethod(method_idx, /* with signature */ true);
   }
 
   if (UNLIKELY(is_critical_native)) {
     VLOG(jni) << "JniCompile: Critical native method detected :: "
-              << art::PrettyMethod(method_idx, dex_file, /* with signature */ true);
+              << dex_file.PrettyMethod(method_idx, /* with signature */ true);
   }
 
   if (kIsDebugBuild) {
     // Don't allow both @FastNative and @CriticalNative. They are mutually exclusive.
     if (UNLIKELY(is_fast_native && is_critical_native)) {
       LOG(FATAL) << "JniCompile: Method cannot be both @CriticalNative and @FastNative"
-                 << art::PrettyMethod(method_idx, dex_file, /* with_signature */ true);
+                 << dex_file.PrettyMethod(method_idx, /* with_signature */ true);
     }
 
     // @CriticalNative - extra checks:
@@ -124,15 +165,15 @@
       CHECK(is_static)
           << "@CriticalNative functions cannot be virtual since that would"
           << "require passing a reference parameter (this), which is illegal "
-          << art::PrettyMethod(method_idx, dex_file, /* with_signature */ true);
+          << dex_file.PrettyMethod(method_idx, /* with_signature */ true);
       CHECK(!is_synchronized)
           << "@CriticalNative functions cannot be synchronized since that would"
           << "require passing a (class and/or this) reference parameter, which is illegal "
-          << art::PrettyMethod(method_idx, dex_file, /* with_signature */ true);
+          << dex_file.PrettyMethod(method_idx, /* with_signature */ true);
       for (size_t i = 0; i < strlen(shorty); ++i) {
         CHECK_NE(Primitive::kPrimNot, Primitive::GetType(shorty[i]))
             << "@CriticalNative methods' shorty types must not have illegal references "
-            << art::PrettyMethod(method_idx, dex_file, /* with_signature */ true);
+            << dex_file.PrettyMethod(method_idx, /* with_signature */ true);
       }
     }
   }
@@ -276,10 +317,32 @@
   __ IncreaseFrameSize(main_out_arg_size);
 
   // Call the read barrier for the declaring class loaded from the method for a static call.
+  // Skip this for @CriticalNative because we didn't build a HandleScope to begin with.
   // Note that we always have outgoing param space available for at least two params.
   if (kUseReadBarrier && is_static && !is_critical_native) {
-    // XX: Why is this necessary only for the jclass? Why not for every single object ref?
-    // Skip this for @CriticalNative because we didn't build a HandleScope to begin with.
+    const bool kReadBarrierFastPath =
+        (instruction_set != kMips) && (instruction_set != kMips64);
+    std::unique_ptr<JNIMacroLabel> skip_cold_path_label;
+    if (kReadBarrierFastPath) {
+      skip_cold_path_label = __ CreateLabel();
+      // Fast path for supported targets.
+      //
+      // Check if gc_is_marking is set -- if it's not, we don't need
+      // a read barrier so skip it.
+      __ LoadFromThread(main_jni_conv->InterproceduralScratchRegister(),
+                        Thread::IsGcMarkingOffset<kPointerSize>(),
+                        Thread::IsGcMarkingSize());
+      // Jump over the slow path if gc is marking is false.
+      __ Jump(skip_cold_path_label.get(),
+              JNIMacroUnaryCondition::kZero,
+              main_jni_conv->InterproceduralScratchRegister());
+    }
+
+    // Construct slow path for read barrier:
+    //
+    // Call into the runtime's ReadBarrierJni and have it fix up
+    // the object address if it was moved.
+
     ThreadOffset<kPointerSize> read_barrier = QUICK_ENTRYPOINT_OFFSET(kPointerSize,
                                                                       pReadBarrierJni);
     main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
@@ -310,6 +373,10 @@
       __ CallFromThread(read_barrier, main_jni_conv->InterproceduralScratchRegister());
     }
     main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));  // Reset.
+
+    if (kReadBarrierFastPath) {
+      __ Bind(skip_cold_path_label.get());
+    }
   }
 
   // 6. Call into appropriate JniMethodStart passing Thread* so that transition out of Runnable
@@ -319,13 +386,11 @@
   FrameOffset locked_object_handle_scope_offset(0xBEEFDEAD);
   if (LIKELY(!is_critical_native)) {
     // Skip this for @CriticalNative methods. They do not call JniMethodStart.
-    ThreadOffset<kPointerSize> jni_start =
-        is_synchronized
-            ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStartSynchronized)
-            : (is_fast_native
-                   ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodFastStart)
-                   : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodStart));
-
+    ThreadOffset<kPointerSize> jni_start(
+        GetJniEntrypointThreadOffset<kPointerSize>(JniEntrypoint::kStart,
+                                                   reference_return,
+                                                   is_synchronized,
+                                                   is_fast_native).SizeValue());
     main_jni_conv->ResetIterator(FrameOffset(main_out_arg_size));
     locked_object_handle_scope_offset = FrameOffset(0);
     if (is_synchronized) {
@@ -517,20 +582,15 @@
 
   if (LIKELY(!is_critical_native)) {
     // 12. Call JniMethodEnd
-    ThreadOffset<kPointerSize> jni_end(-1);
+    ThreadOffset<kPointerSize> jni_end(
+        GetJniEntrypointThreadOffset<kPointerSize>(JniEntrypoint::kEnd,
+                                                   reference_return,
+                                                   is_synchronized,
+                                                   is_fast_native).SizeValue());
     if (reference_return) {
       // Pass result.
-      jni_end = is_synchronized
-                    ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReferenceSynchronized)
-                    : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndWithReference);
       SetNativeParameter(jni_asm.get(), end_jni_conv.get(), end_jni_conv->ReturnRegister());
       end_jni_conv->Next();
-    } else {
-      jni_end = is_synchronized
-                    ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEndSynchronized)
-                    : (is_fast_native
-                           ? QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodFastEnd)
-                           : QUICK_ENTRYPOINT_OFFSET(kPointerSize, pJniMethodEnd));
     }
     // Pass saved local reference state.
     if (end_jni_conv->IsCurrentParamOnStack()) {
diff --git a/compiler/linker/arm64/relative_patcher_arm64.cc b/compiler/linker/arm64/relative_patcher_arm64.cc
index 3b77880..4a9de7f 100644
--- a/compiler/linker/arm64/relative_patcher_arm64.cc
+++ b/compiler/linker/arm64/relative_patcher_arm64.cc
@@ -214,7 +214,7 @@
         DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative ||
                patch.GetType() == LinkerPatch::Type::kTypeRelative) << patch.GetType();
       } else {
-        // With the read barrier (non-baker) enabled, it could be kDexCacheArray in the
+        // With the read barrier (non-Baker) enabled, it could be kDexCacheArray in the
         // HLoadString::LoadKind::kDexCachePcRelative case of VisitLoadString().
         DCHECK(patch.GetType() == LinkerPatch::Type::kStringRelative ||
                patch.GetType() == LinkerPatch::Type::kTypeRelative ||
diff --git a/compiler/oat_test.cc b/compiler/oat_test.cc
index b4c60d1..102637f 100644
--- a/compiler/oat_test.cc
+++ b/compiler/oat_test.cc
@@ -65,14 +65,14 @@
                                                             method->GetDexMethodIndex()));
 
     if (compiled_method == nullptr) {
-      EXPECT_TRUE(oat_method.GetQuickCode() == nullptr) << PrettyMethod(method) << " "
+      EXPECT_TRUE(oat_method.GetQuickCode() == nullptr) << method->PrettyMethod() << " "
                                                         << oat_method.GetQuickCode();
       EXPECT_EQ(oat_method.GetFrameSizeInBytes(), 0U);
       EXPECT_EQ(oat_method.GetCoreSpillMask(), 0U);
       EXPECT_EQ(oat_method.GetFpSpillMask(), 0U);
     } else {
       const void* quick_oat_code = oat_method.GetQuickCode();
-      EXPECT_TRUE(quick_oat_code != nullptr) << PrettyMethod(method);
+      EXPECT_TRUE(quick_oat_code != nullptr) << method->PrettyMethod();
       EXPECT_EQ(oat_method.GetFrameSizeInBytes(), compiled_method->GetFrameSizeInBytes());
       EXPECT_EQ(oat_method.GetCoreSpillMask(), compiled_method->GetCoreSpillMask());
       EXPECT_EQ(oat_method.GetFpSpillMask(), compiled_method->GetFpSpillMask());
@@ -82,7 +82,7 @@
       EXPECT_FALSE(quick_code.empty());
       size_t code_size = quick_code.size() * sizeof(quick_code[0]);
       EXPECT_EQ(0, memcmp(quick_oat_code, &quick_code[0], code_size))
-          << PrettyMethod(method) << " " << code_size;
+          << method->PrettyMethod() << " " << code_size;
       CHECK_EQ(0, memcmp(quick_oat_code, &quick_code[0], code_size));
     }
   }
@@ -92,7 +92,7 @@
                      const std::vector<std::string>& compiler_options,
                      /*out*/std::string* error_msg) {
     ASSERT_TRUE(error_msg != nullptr);
-    insn_features_.reset(InstructionSetFeatures::FromVariant(insn_set, "default", error_msg));
+    insn_features_ = InstructionSetFeatures::FromVariant(insn_set, "default", error_msg);
     ASSERT_TRUE(insn_features_ != nullptr) << error_msg;
     compiler_options_.reset(new CompilerOptions);
     for (const std::string& option : compiler_options) {
@@ -203,7 +203,8 @@
     }
     linker::MultiOatRelativePatcher patcher(compiler_driver_->GetInstructionSet(),
                                             instruction_set_features_.get());
-    oat_writer.PrepareLayout(compiler_driver_.get(), nullptr, dex_files, &patcher);
+    oat_writer.Initialize(compiler_driver_.get(), nullptr, dex_files);
+    oat_writer.PrepareLayout(&patcher);
     size_t rodata_size = oat_writer.GetOatHeader().GetExecutableOffset();
     size_t text_size = oat_writer.GetOatSize() - rodata_size;
     elf_writer->PrepareDynamicSection(rodata_size,
@@ -376,7 +377,8 @@
   if (kCompile) {
     TimingLogger timings2("OatTest::WriteRead", false, false);
     compiler_driver_->SetDexFilesForOatFile(class_linker->GetBootClassPath());
-    compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), &timings2);
+    compiler_driver_->CompileAll(
+        class_loader, class_linker->GetBootClassPath(), /* verifier_deps */ nullptr, &timings2);
   }
 
   ScratchFile tmp_oat, tmp_vdex(tmp_oat, ".vdex");
@@ -390,7 +392,8 @@
   ASSERT_TRUE(success);
 
   if (kCompile) {  // OatWriter strips the code, regenerate to compare
-    compiler_driver_->CompileAll(class_loader, class_linker->GetBootClassPath(), &timings);
+    compiler_driver_->CompileAll(
+        class_loader, class_linker->GetBootClassPath(), /* verifier_deps */ nullptr, &timings);
   }
   std::unique_ptr<OatFile> oat_file(OatFile::Open(tmp_oat.GetFilename(),
                                                   tmp_oat.GetFilename(),
@@ -462,7 +465,7 @@
   EXPECT_EQ(72U, sizeof(OatHeader));
   EXPECT_EQ(4U, sizeof(OatMethodOffsets));
   EXPECT_EQ(20U, sizeof(OatQuickMethodHeader));
-  EXPECT_EQ(163 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)),
+  EXPECT_EQ(164 * static_cast<size_t>(GetInstructionSetPointerSize(kRuntimeISA)),
             sizeof(QuickEntryPoints));
 }
 
@@ -514,7 +517,7 @@
                                   soa.Decode<mirror::ClassLoader>(class_loader).Ptr());
   }
   compiler_driver_->SetDexFilesForOatFile(dex_files);
-  compiler_driver_->CompileAll(class_loader, dex_files, &timings);
+  compiler_driver_->CompileAll(class_loader, dex_files, /* verifier_deps */ nullptr, &timings);
 
   ScratchFile tmp_oat, tmp_vdex(tmp_oat, ".vdex");
   SafeMap<std::string, std::string> key_value_store;
diff --git a/compiler/oat_writer.cc b/compiler/oat_writer.cc
index 44c26ed..f9173f5 100644
--- a/compiler/oat_writer.cc
+++ b/compiler/oat_writer.cc
@@ -299,6 +299,7 @@
     vdex_size_(0u),
     vdex_dex_files_offset_(0u),
     vdex_verifier_deps_offset_(0u),
+    vdex_quickening_info_offset_(0u),
     oat_size_(0u),
     bss_start_(0u),
     bss_size_(0u),
@@ -314,6 +315,8 @@
     size_dex_file_(0),
     size_verifier_deps_(0),
     size_verifier_deps_alignment_(0),
+    size_quickening_info_(0),
+    size_quickening_info_alignment_(0),
     size_interpreter_to_interpreter_bridge_(0),
     size_interpreter_to_compiled_code_bridge_(0),
     size_jni_dlsym_lookup_(0),
@@ -519,15 +522,9 @@
   return true;
 }
 
-void OatWriter::PrepareLayout(const CompilerDriver* compiler,
-                              ImageWriter* image_writer,
-                              const std::vector<const DexFile*>& dex_files,
-                              linker::MultiOatRelativePatcher* relative_patcher) {
+void OatWriter::PrepareLayout(linker::MultiOatRelativePatcher* relative_patcher) {
   CHECK(write_state_ == WriteState::kPrepareLayout);
 
-  compiler_driver_ = compiler;
-  image_writer_ = image_writer;
-  dex_files_ = &dex_files;
   relative_patcher_ = relative_patcher;
   SetMultiOatRelativePatcherAdjustment();
 
@@ -706,9 +703,10 @@
 
 class OatWriter::InitCodeMethodVisitor : public OatDexMethodVisitor {
  public:
-  InitCodeMethodVisitor(OatWriter* writer, size_t offset)
+  InitCodeMethodVisitor(OatWriter* writer, size_t offset, size_t quickening_info_offset)
     : OatDexMethodVisitor(writer, offset),
-      debuggable_(writer->GetCompilerDriver()->GetCompilerOptions().GetDebuggable()) {
+      debuggable_(writer->GetCompilerDriver()->GetCompilerOptions().GetDebuggable()),
+      current_quickening_info_offset_(quickening_info_offset) {
     writer_->absolute_patch_locations_.reserve(
         writer_->compiler_driver_->GetNonRelativeLinkerPatchCount());
   }
@@ -726,6 +724,9 @@
     OatClass* oat_class = &writer_->oat_classes_[oat_class_index_];
     CompiledMethod* compiled_method = oat_class->GetCompiledMethod(class_def_method_index);
 
+    if (it.GetMethodCodeItem() != nullptr) {
+      current_quickening_info_offset_ += sizeof(uint32_t);
+    }
     if (compiled_method != nullptr) {
       // Derived from CompiledMethod.
       uint32_t quick_code_offset = 0;
@@ -759,7 +760,7 @@
         if (writer_->relative_patcher_->GetOffset(method_ref) != 0u) {
           // TODO: Should this be a hard failure?
           LOG(WARNING) << "Multiple definitions of "
-              << PrettyMethod(method_ref.dex_method_index, *method_ref.dex_file)
+              << method_ref.dex_file->PrettyMethod(method_ref.dex_method_index)
               << " offsets " << writer_->relative_patcher_->GetOffset(method_ref)
               << " " << quick_code_offset;
         } else {
@@ -771,15 +772,28 @@
       DCHECK_LT(method_offsets_index_, oat_class->method_headers_.size());
       OatQuickMethodHeader* method_header = &oat_class->method_headers_[method_offsets_index_];
       uint32_t vmap_table_offset = method_header->vmap_table_offset_;
-      // If we don't have quick code, then we must have a vmap, as that is how the dex2dex
-      // compiler records its transformations.
-      DCHECK(!quick_code.empty() || vmap_table_offset != 0);
       // The code offset was 0 when the mapping/vmap table offset was set, so it's set
       // to 0-offset and we need to adjust it by code_offset.
       uint32_t code_offset = quick_code_offset - thumb_offset;
-      if (vmap_table_offset != 0u && code_offset != 0u) {
-        vmap_table_offset += code_offset;
-        DCHECK_LT(vmap_table_offset, code_offset) << "Overflow in oat offsets";
+      if (!compiled_method->GetQuickCode().empty()) {
+        // If the code is compiled, we write the offset of the stack map relative
+        // to the code,
+        if (vmap_table_offset != 0u) {
+          vmap_table_offset += code_offset;
+          DCHECK_LT(vmap_table_offset, code_offset);
+        }
+      } else {
+        if (kIsVdexEnabled) {
+          // We write the offset in the .vdex file.
+          DCHECK_EQ(vmap_table_offset, 0u);
+          vmap_table_offset = current_quickening_info_offset_;
+          ArrayRef<const uint8_t> map = compiled_method->GetVmapTable();
+          current_quickening_info_offset_ += map.size() * sizeof(map.front());
+        } else {
+          // We write the offset of the quickening info relative to the code.
+          vmap_table_offset += code_offset;
+          DCHECK_LT(vmap_table_offset, code_offset);
+        }
       }
       uint32_t frame_size_in_bytes = compiled_method->GetFrameSizeInBytes();
       uint32_t core_spill_mask = compiled_method->GetCoreSpillMask();
@@ -878,6 +892,9 @@
 
   // Cache of compiler's --debuggable option.
   const bool debuggable_;
+
+  // Offset in the vdex file for the quickening info.
+  uint32_t current_quickening_info_offset_;
 };
 
 class OatWriter::InitMapMethodVisitor : public OatDexMethodVisitor {
@@ -893,21 +910,25 @@
 
     if (compiled_method != nullptr) {
       DCHECK_LT(method_offsets_index_, oat_class->method_offsets_.size());
-      DCHECK_EQ(oat_class->method_headers_[method_offsets_index_].vmap_table_offset_, 0u);
+      // If vdex is enabled, we only emit the stack map of compiled code. The quickening info will
+      // be in the vdex file.
+      if (!compiled_method->GetQuickCode().empty() || !kIsVdexEnabled) {
+        DCHECK_EQ(oat_class->method_headers_[method_offsets_index_].vmap_table_offset_, 0u);
 
-      ArrayRef<const uint8_t> map = compiled_method->GetVmapTable();
-      uint32_t map_size = map.size() * sizeof(map[0]);
-      if (map_size != 0u) {
-        size_t offset = dedupe_map_.GetOrCreate(
-            map.data(),
-            [this, map_size]() {
-              uint32_t new_offset = offset_;
-              offset_ += map_size;
-              return new_offset;
-            });
-        // Code offset is not initialized yet, so set the map offset to 0u-offset.
-        DCHECK_EQ(oat_class->method_offsets_[method_offsets_index_].code_offset_, 0u);
-        oat_class->method_headers_[method_offsets_index_].vmap_table_offset_ = 0u - offset;
+        ArrayRef<const uint8_t> map = compiled_method->GetVmapTable();
+        uint32_t map_size = map.size() * sizeof(map[0]);
+        if (map_size != 0u) {
+          size_t offset = dedupe_map_.GetOrCreate(
+              map.data(),
+              [this, map_size]() {
+                uint32_t new_offset = offset_;
+                offset_ += map_size;
+                return new_offset;
+              });
+          // Code offset is not initialized yet, so set the map offset to 0u-offset.
+          DCHECK_EQ(oat_class->method_offsets_[method_offsets_index_].code_offset_, 0u);
+          oat_class->method_headers_[method_offsets_index_].vmap_table_offset_ = 0u - offset;
+        }
       }
       ++method_offsets_index_;
     }
@@ -967,7 +988,7 @@
           invoke_type);
       if (method == nullptr) {
         LOG(FATAL_WITHOUT_ABORT) << "Unexpected failure to resolve a method: "
-            << PrettyMethod(it.GetMemberIndex(), *dex_file_, true);
+            << dex_file_->PrettyMethod(it.GetMemberIndex(), true);
         soa.Self()->AssertPendingException();
         mirror::Throwable* exc = soa.Self()->GetException();
         std::string dump = exc->Dump();
@@ -1073,7 +1094,7 @@
                              GetInstructionSetAlignment(compiled_method->GetInstructionSet()));
         DCHECK_EQ(method_offsets.code_offset_,
                   offset_ + sizeof(OatQuickMethodHeader) + compiled_method->CodeDelta())
-            << PrettyMethod(it.GetMemberIndex(), *dex_file_);
+            << dex_file_->PrettyMethod(it.GetMemberIndex());
         const OatQuickMethodHeader& method_header =
             oat_class->method_headers_[method_offsets_index_];
         if (!out->WriteFully(&method_header, sizeof(method_header))) {
@@ -1185,7 +1206,7 @@
 
   void ReportWriteFailure(const char* what, const ClassDataItemIterator& it) {
     PLOG(ERROR) << "Failed to write " << what << " for "
-        << PrettyMethod(it.GetMemberIndex(), *dex_file_) << " to " << out_->GetLocation();
+        << dex_file_->PrettyMethod(it.GetMemberIndex()) << " to " << out_->GetLocation();
   }
 
   ArtMethod* GetTargetMethod(const LinkerPatch& patch)
@@ -1370,13 +1391,16 @@
       DCHECK((compiled_method->GetVmapTable().size() == 0u && map_offset == 0u) ||
              (compiled_method->GetVmapTable().size() != 0u && map_offset != 0u))
           << compiled_method->GetVmapTable().size() << " " << map_offset << " "
-          << PrettyMethod(it.GetMemberIndex(), *dex_file_);
+          << dex_file_->PrettyMethod(it.GetMemberIndex());
 
-      if (map_offset != 0u) {
+      // If vdex is enabled, only emit the map for compiled code. The quickening info
+      // is emitted in the vdex already.
+      if (map_offset != 0u &&
+          !(kIsVdexEnabled && compiled_method->GetQuickCode().empty())) {
         // Transform map_offset to actual oat data offset.
         map_offset = (code_offset - compiled_method->CodeDelta()) - map_offset;
         DCHECK_NE(map_offset, 0u);
-        DCHECK_LE(map_offset, offset_) << PrettyMethod(it.GetMemberIndex(), *dex_file_);
+        DCHECK_LE(map_offset, offset_) << dex_file_->PrettyMethod(it.GetMemberIndex());
 
         ArrayRef<const uint8_t> map = compiled_method->GetVmapTable();
         size_t map_size = map.size() * sizeof(map[0]);
@@ -1401,7 +1425,7 @@
 
   void ReportWriteFailure(const ClassDataItemIterator& it) {
     PLOG(ERROR) << "Failed to write map for "
-        << PrettyMethod(it.GetMemberIndex(), *dex_file_) << " to " << out_->GetLocation();
+        << dex_file_->PrettyMethod(it.GetMemberIndex()) << " to " << out_->GetLocation();
   }
 };
 
@@ -1539,21 +1563,18 @@
 }
 
 size_t OatWriter::InitOatCodeDexFiles(size_t offset) {
-  #define VISIT(VisitorType)                          \
-    do {                                              \
-      VisitorType visitor(this, offset);              \
-      bool success = VisitDexMethods(&visitor);       \
-      DCHECK(success);                                \
-      offset = visitor.GetOffset();                   \
-    } while (false)
+  InitCodeMethodVisitor code_visitor(this, offset, vdex_quickening_info_offset_);
+  bool success = VisitDexMethods(&code_visitor);
+  DCHECK(success);
+  offset = code_visitor.GetOffset();
 
-  VISIT(InitCodeMethodVisitor);
   if (HasImage()) {
-    VISIT(InitImageMethodVisitor);
+    InitImageMethodVisitor image_visitor(this, offset);
+    success = VisitDexMethods(&image_visitor);
+    DCHECK(success);
+    offset = image_visitor.GetOffset();
   }
 
-  #undef VISIT
-
   return offset;
 }
 
@@ -1626,6 +1647,90 @@
   return true;
 }
 
+class OatWriter::WriteQuickeningInfoMethodVisitor : public DexMethodVisitor {
+ public:
+  WriteQuickeningInfoMethodVisitor(OatWriter* writer, OutputStream* out, uint32_t offset)
+    : DexMethodVisitor(writer, offset),
+      out_(out),
+      written_bytes_(0u) {}
+
+  bool VisitMethod(size_t class_def_method_index ATTRIBUTE_UNUSED,
+                   const ClassDataItemIterator& it) {
+    if (it.GetMethodCodeItem() == nullptr) {
+      // No CodeItem. Native or abstract method.
+      return true;
+    }
+
+    uint32_t method_idx = it.GetMemberIndex();
+    CompiledMethod* compiled_method =
+        writer_->compiler_driver_->GetCompiledMethod(MethodReference(dex_file_, method_idx));
+
+    uint32_t length = 0;
+    const uint8_t* data = nullptr;
+    // VMap only contains quickening info if this method is not compiled.
+    if (compiled_method != nullptr && compiled_method->GetQuickCode().empty()) {
+      ArrayRef<const uint8_t> map = compiled_method->GetVmapTable();
+      data = map.data();
+      length = map.size() * sizeof(map.front());
+    }
+
+    if (!out_->WriteFully(&length, sizeof(length)) ||
+        !out_->WriteFully(data, length)) {
+      PLOG(ERROR) << "Failed to write quickening info for "
+          << dex_file_->PrettyMethod(it.GetMemberIndex()) << " to " << out_->GetLocation();
+      return false;
+    }
+    offset_ += sizeof(length) + length;
+    written_bytes_ += sizeof(length) + length;
+    return true;
+  }
+
+  size_t GetNumberOfWrittenBytes() const {
+    return written_bytes_;
+  }
+
+ private:
+  OutputStream* const out_;
+  size_t written_bytes_;
+};
+
+bool OatWriter::WriteQuickeningInfo(OutputStream* vdex_out) {
+  if (!kIsVdexEnabled) {
+    return true;
+  }
+
+  size_t initial_offset = vdex_size_;
+  size_t start_offset = RoundUp(initial_offset, 4u);
+
+  vdex_size_ = start_offset;
+  vdex_quickening_info_offset_ = vdex_size_;
+  size_quickening_info_alignment_ = start_offset - initial_offset;
+
+  off_t actual_offset = vdex_out->Seek(start_offset, kSeekSet);
+  if (actual_offset != static_cast<off_t>(start_offset)) {
+    PLOG(ERROR) << "Failed to seek to quickening info section. Actual: " << actual_offset
+                << " Expected: " << start_offset
+                << " Output: " << vdex_out->GetLocation();
+    return false;
+  }
+
+  WriteQuickeningInfoMethodVisitor visitor(this, vdex_out, start_offset);
+  if (!VisitDexMethods(&visitor)) {
+    PLOG(ERROR) << "Failed to write the vdex quickening info. File: " << vdex_out->GetLocation();
+    return false;
+  }
+
+  if (!vdex_out->Flush()) {
+    PLOG(ERROR) << "Failed to flush stream after writing quickening info."
+                << " File: " << vdex_out->GetLocation();
+    return false;
+  }
+
+  size_quickening_info_ = visitor.GetNumberOfWrittenBytes();
+  vdex_size_ += size_quickening_info_;
+  return true;
+}
+
 bool OatWriter::WriteVerifierDeps(OutputStream* vdex_out, verifier::VerifierDeps* verifier_deps) {
   if (!kIsVdexEnabled) {
     return true;
@@ -1654,7 +1759,7 @@
   }
 
   std::vector<uint8_t> buffer;
-  verifier_deps->Encode(&buffer);
+  verifier_deps->Encode(*dex_files_, &buffer);
 
   if (!vdex_out->WriteFully(buffer.data(), buffer.size())) {
     PLOG(ERROR) << "Failed to write verifier deps."
@@ -1717,6 +1822,8 @@
     DO_STAT(size_dex_file_);
     DO_STAT(size_verifier_deps_);
     DO_STAT(size_verifier_deps_alignment_);
+    DO_STAT(size_quickening_info_);
+    DO_STAT(size_quickening_info_alignment_);
     DO_STAT(size_interpreter_to_interpreter_bridge_);
     DO_STAT(size_interpreter_to_compiled_code_bridge_);
     DO_STAT(size_jni_dlsym_lookup_);
@@ -2434,9 +2541,11 @@
   DCHECK_NE(vdex_verifier_deps_offset_, 0u);
 
   size_t dex_section_size = vdex_verifier_deps_offset_ - vdex_dex_files_offset_;
-  size_t verifier_deps_section_size = vdex_size_ - vdex_verifier_deps_offset_;
+  size_t verifier_deps_section_size = vdex_quickening_info_offset_ - vdex_verifier_deps_offset_;
+  size_t quickening_info_section_size = vdex_size_ - vdex_quickening_info_offset_;
 
-  VdexFile::Header vdex_header(dex_section_size, verifier_deps_section_size);
+  VdexFile::Header vdex_header(
+      dex_section_size, verifier_deps_section_size, quickening_info_section_size);
   if (!vdex_out->WriteFully(&vdex_header, sizeof(VdexFile::Header))) {
     PLOG(ERROR) << "Failed to write vdex header. File: " << vdex_out->GetLocation();
     return false;
diff --git a/compiler/oat_writer.h b/compiler/oat_writer.h
index 1cc193b..3d08ad3 100644
--- a/compiler/oat_writer.h
+++ b/compiler/oat_writer.h
@@ -118,6 +118,10 @@
   //   - AddRawDexFileSource().
   // Then the user must call in order
   //   - WriteAndOpenDexFiles()
+  //   - Initialize()
+  //   - WriteVerifierDeps()
+  //   - WriteQuickeningInfo()
+  //   - WriteVdexHeader()
   //   - PrepareLayout(),
   //   - WriteRodata(),
   //   - WriteCode(),
@@ -154,14 +158,20 @@
                             bool verify,
                             /*out*/ std::unique_ptr<MemMap>* opened_dex_files_map,
                             /*out*/ std::vector<std::unique_ptr<const DexFile>>* opened_dex_files);
+  bool WriteQuickeningInfo(OutputStream* vdex_out);
   bool WriteVerifierDeps(OutputStream* vdex_out, verifier::VerifierDeps* verifier_deps);
   bool WriteVdexHeader(OutputStream* vdex_out);
+  // Initialize the writer with the given parameters.
+  void Initialize(const CompilerDriver* compiler,
+                  ImageWriter* image_writer,
+                  const std::vector<const DexFile*>& dex_files) {
+    compiler_driver_ = compiler;
+    image_writer_ = image_writer;
+    dex_files_ = &dex_files;
+  }
 
   // Prepare layout of remaining data.
-  void PrepareLayout(const CompilerDriver* compiler,
-                     ImageWriter* image_writer,
-                     const std::vector<const DexFile*>& dex_files,
-                     linker::MultiOatRelativePatcher* relative_patcher);
+  void PrepareLayout(linker::MultiOatRelativePatcher* relative_patcher);
   // Write the rest of .rodata section (ClassOffsets[], OatClass[], maps).
   bool WriteRodata(OutputStream* out);
   // Write the code to the .text section.
@@ -239,6 +249,7 @@
   class InitImageMethodVisitor;
   class WriteCodeMethodVisitor;
   class WriteMapMethodVisitor;
+  class WriteQuickeningInfoMethodVisitor;
 
   // Visit all the methods in all the compiled dex files in their definition order
   // with a given DexMethodVisitor.
@@ -325,6 +336,9 @@
   // Offset of section holding VerifierDeps inside Vdex.
   size_t vdex_verifier_deps_offset_;
 
+  // Offset of section holding quickening info inside Vdex.
+  size_t vdex_quickening_info_offset_;
+
   // Size required for Oat data structures.
   size_t oat_size_;
 
@@ -368,6 +382,8 @@
   uint32_t size_dex_file_;
   uint32_t size_verifier_deps_;
   uint32_t size_verifier_deps_alignment_;
+  uint32_t size_quickening_info_;
+  uint32_t size_quickening_info_alignment_;
   uint32_t size_interpreter_to_interpreter_bridge_;
   uint32_t size_interpreter_to_compiled_code_bridge_;
   uint32_t size_jni_dlsym_lookup_;
diff --git a/compiler/optimizing/bounds_check_elimination.cc b/compiler/optimizing/bounds_check_elimination.cc
index 994d394..7dc094b 100644
--- a/compiler/optimizing/bounds_check_elimination.cc
+++ b/compiler/optimizing/bounds_check_elimination.cc
@@ -548,7 +548,21 @@
   void VisitBasicBlock(HBasicBlock* block) OVERRIDE {
     DCHECK(!IsAddedBlock(block));
     first_index_bounds_check_map_.clear();
-    HGraphVisitor::VisitBasicBlock(block);
+    // Visit phis and instructions using a safe iterator. The iteration protects
+    // against deleting the current instruction during iteration. However, it
+    // must advance next_ if that instruction is deleted during iteration.
+    for (HInstruction* instruction = block->GetFirstPhi(); instruction != nullptr;) {
+      DCHECK(instruction->IsInBlock());
+      next_ = instruction->GetNext();
+      instruction->Accept(this);
+      instruction = next_;
+    }
+    for (HInstruction* instruction = block->GetFirstInstruction(); instruction != nullptr;) {
+      DCHECK(instruction->IsInBlock());
+      next_ = instruction->GetNext();
+      instruction->Accept(this);
+      instruction = next_;
+    }
     // We should never deoptimize from an osr method, otherwise we might wrongly optimize
     // code dominated by the deoptimization.
     if (!GetGraph()->IsCompilingOsr()) {
@@ -1361,6 +1375,11 @@
         ValueBound other_value = ValueBound::AsValueBound(other_index);
         int32_t other_c = other_value.GetConstant();
         if (array_length == other_array_length && base == other_value.GetInstruction()) {
+          // Ensure every candidate could be picked for code generation.
+          bool b1 = false, b2 = false;
+          if (!induction_range_.CanGenerateRange(other_bounds_check, other_index, &b1, &b2)) {
+            continue;
+          }
           // Does the current basic block dominate all back edges? If not,
           // add this candidate later only if it falls into the range.
           if (!loop->DominatesAllBackEdges(user->GetBlock())) {
@@ -1793,7 +1812,12 @@
   }
 
   /** Helper method to replace an instruction with another instruction. */
-  static void ReplaceInstruction(HInstruction* instruction, HInstruction* replacement) {
+  void ReplaceInstruction(HInstruction* instruction, HInstruction* replacement) {
+    // Safe iteration.
+    if (instruction == next_) {
+      next_ = next_->GetNext();
+    }
+    // Replace and remove.
     instruction->ReplaceWith(replacement);
     instruction->GetBlock()->RemoveInstruction(instruction);
   }
@@ -1826,6 +1850,9 @@
   // Range analysis based on induction variables.
   InductionVarRange induction_range_;
 
+  // Safe iteration.
+  HInstruction* next_;
+
   DISALLOW_COPY_AND_ASSIGN(BCEVisitor);
 };
 
@@ -1840,8 +1867,8 @@
   // that value dominated by that instruction fits in that range. Range of that
   // value can be narrowed further down in the dominator tree.
   BCEVisitor visitor(graph_, side_effects_, induction_analysis_);
-  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    HBasicBlock* current = it.Current();
+  for (size_t i = 0, size = graph_->GetReversePostOrder().size(); i != size; ++i) {
+    HBasicBlock* current = graph_->GetReversePostOrder()[i];
     if (visitor.IsAddedBlock(current)) {
       // Skip added blocks. Their effects are already taken care of.
       continue;
@@ -1850,8 +1877,11 @@
     // Skip forward to the current block in case new basic blocks were inserted
     // (which always appear earlier in reverse post order) to avoid visiting the
     // same basic block twice.
-    for ( ; !it.Done() && it.Current() != current; it.Advance()) {
-    }
+    size_t new_size = graph_->GetReversePostOrder().size();
+    DCHECK_GE(new_size, size);
+    i += new_size - size;
+    DCHECK_EQ(current, graph_->GetReversePostOrder()[i]);
+    size = new_size;
   }
 
   // Perform cleanup.
diff --git a/compiler/optimizing/builder.cc b/compiler/optimizing/builder.cc
index 86742e6..2927e1f 100644
--- a/compiler/optimizing/builder.cc
+++ b/compiler/optimizing/builder.cc
@@ -51,7 +51,7 @@
 
   if (compiler_options.IsHugeMethod(code_item_.insns_size_in_code_units_)) {
     VLOG(compiler) << "Skip compilation of huge method "
-                   << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
+                   << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
                    << ": " << code_item_.insns_size_in_code_units_ << " code units";
     MaybeRecordStat(MethodCompilationStat::kNotCompiledHugeMethod);
     return true;
@@ -61,7 +61,7 @@
   if (compiler_options.IsLargeMethod(code_item_.insns_size_in_code_units_)
       && (number_of_branches == 0)) {
     VLOG(compiler) << "Skip compilation of large method with no branch "
-                   << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
+                   << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
                    << ": " << code_item_.insns_size_in_code_units_ << " code units";
     MaybeRecordStat(MethodCompilationStat::kNotCompiledLargeMethodNoBranches);
     return true;
diff --git a/compiler/optimizing/builder.h b/compiler/optimizing/builder.h
index 580ef72..f896f11 100644
--- a/compiler/optimizing/builder.h
+++ b/compiler/optimizing/builder.h
@@ -43,7 +43,7 @@
                 OptimizingCompilerStats* compiler_stats,
                 const uint8_t* interpreter_metadata,
                 Handle<mirror::DexCache> dex_cache,
-                StackHandleScopeCollection* handles)
+                VariableSizedHandleScope* handles)
       : graph_(graph),
         dex_file_(dex_file),
         code_item_(code_item),
@@ -68,7 +68,7 @@
   // Only for unit testing.
   HGraphBuilder(HGraph* graph,
                 const DexFile::CodeItem& code_item,
-                StackHandleScopeCollection* handles,
+                VariableSizedHandleScope* handles,
                 Primitive::Type return_type = Primitive::kPrimInt)
       : graph_(graph),
         dex_file_(nullptr),
diff --git a/compiler/optimizing/bytecode_utils.h b/compiler/optimizing/bytecode_utils.h
index 6dfffce..133afa4 100644
--- a/compiler/optimizing/bytecode_utils.h
+++ b/compiler/optimizing/bytecode_utils.h
@@ -26,7 +26,8 @@
 
 class CodeItemIterator : public ValueObject {
  public:
-  CodeItemIterator(const DexFile::CodeItem& code_item, uint32_t start_dex_pc = 0u)
+  explicit CodeItemIterator(const DexFile::CodeItem& code_item) : CodeItemIterator(code_item, 0u) {}
+  CodeItemIterator(const DexFile::CodeItem& code_item, uint32_t start_dex_pc)
       : code_ptr_(code_item.insns_ + start_dex_pc),
         code_end_(code_item.insns_ + code_item.insns_size_in_code_units_),
         dex_pc_(start_dex_pc) {}
diff --git a/compiler/optimizing/code_generator.cc b/compiler/optimizing/code_generator.cc
index 0f8cdbb..8b450e1 100644
--- a/compiler/optimizing/code_generator.cc
+++ b/compiler/optimizing/code_generator.cc
@@ -671,9 +671,9 @@
     return;
   }
   ArenaVector<HSuspendCheck*> loop_headers(graph.GetArena()->Adapter(kArenaAllocMisc));
-  for (HReversePostOrderIterator it(graph); !it.Done(); it.Advance()) {
-    if (it.Current()->IsLoopHeader()) {
-      HSuspendCheck* suspend_check = it.Current()->GetLoopInformation()->GetSuspendCheck();
+  for (HBasicBlock* block : graph.GetReversePostOrder()) {
+    if (block->IsLoopHeader()) {
+      HSuspendCheck* suspend_check = block->GetLoopInformation()->GetSuspendCheck();
       if (!suspend_check->GetEnvironment()->IsFromInlinedInvoke()) {
         loop_headers.push_back(suspend_check);
       }
diff --git a/compiler/optimizing/code_generator.h b/compiler/optimizing/code_generator.h
index 49f4f18..bf246ad 100644
--- a/compiler/optimizing/code_generator.h
+++ b/compiler/optimizing/code_generator.h
@@ -30,6 +30,7 @@
 #include "memory_region.h"
 #include "nodes.h"
 #include "optimizing_compiler_stats.h"
+#include "read_barrier_option.h"
 #include "stack_map_stream.h"
 #include "utils/label.h"
 
@@ -50,6 +51,9 @@
 // Maximum value for a primitive long.
 static int64_t constexpr kPrimLongMax = INT64_C(0x7fffffffffffffff);
 
+static constexpr ReadBarrierOption kCompilerReadBarrierOption =
+    kEmitCompilerReadBarrier ? kWithReadBarrier : kWithoutReadBarrier;
+
 class Assembler;
 class CodeGenerator;
 class CompilerDriver;
@@ -560,8 +564,6 @@
                                                                     kArenaAllocCodeGenerator)),
         blocked_fpu_registers_(graph->GetArena()->AllocArray<bool>(number_of_fpu_registers,
                                                                    kArenaAllocCodeGenerator)),
-        blocked_register_pairs_(graph->GetArena()->AllocArray<bool>(number_of_register_pairs,
-                                                                    kArenaAllocCodeGenerator)),
         number_of_core_registers_(number_of_core_registers),
         number_of_fpu_registers_(number_of_fpu_registers),
         number_of_register_pairs_(number_of_register_pairs),
@@ -649,7 +651,6 @@
   // arrays.
   bool* const blocked_core_registers_;
   bool* const blocked_fpu_registers_;
-  bool* const blocked_register_pairs_;
   size_t number_of_core_registers_;
   size_t number_of_fpu_registers_;
   size_t number_of_register_pairs_;
diff --git a/compiler/optimizing/code_generator_arm.cc b/compiler/optimizing/code_generator_arm.cc
index 9870876..7c3a2c6 100644
--- a/compiler/optimizing/code_generator_arm.cc
+++ b/compiler/optimizing/code_generator_arm.cc
@@ -107,7 +107,7 @@
     size_t number_of_d_regs = (last - first + 1) / 2;
 
     if (number_of_d_regs == 1) {
-       __ StoreDToOffset(d_reg, SP, stack_offset);
+      __ StoreDToOffset(d_reg, SP, stack_offset);
     } else if (number_of_d_regs > 1) {
       __ add(IP, SP, ShifterOperand(stack_offset));
       __ vstmiad(IP, d_reg, number_of_d_regs);
@@ -429,34 +429,50 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+    HLoadString* load = instruction_->AsLoadString();
+    const uint32_t string_index = load->GetStringIndex();
+    Register out = locations->Out().AsRegister<Register>();
+    Register temp = locations->GetTemp(0).AsRegister<Register>();
+    constexpr bool call_saves_everything_except_r0 = (!kUseReadBarrier || kUseBakerReadBarrier);
 
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
 
     InvokeRuntimeCallingConvention calling_convention;
-    HLoadString* load = instruction_->AsLoadString();
-    const uint32_t string_index = load->GetStringIndex();
+    // In the unlucky case that the `temp` is R0, we preserve the address in `out` across
+    // the kSaveEverything call (or use `out` for the address after non-kSaveEverything call).
+    bool temp_is_r0 = (temp == calling_convention.GetRegisterAt(0));
+    Register entry_address = temp_is_r0 ? out : temp;
+    DCHECK_NE(entry_address, calling_convention.GetRegisterAt(0));
+    if (call_saves_everything_except_r0 && temp_is_r0) {
+      __ mov(entry_address, ShifterOperand(temp));
+    }
+
     __ LoadImmediate(calling_convention.GetRegisterAt(0), string_index);
     arm_codegen->InvokeRuntime(kQuickResolveString, instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+
+    // Store the resolved String to the .bss entry.
+    if (call_saves_everything_except_r0) {
+      // The string entry address was preserved in `entry_address` thanks to kSaveEverything.
+      __ str(R0, Address(entry_address));
+    } else {
+      // For non-Baker read barrier, we need to re-calculate the address of the string entry.
+      CodeGeneratorARM::PcRelativePatchInfo* labels =
+          arm_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
+      __ BindTrackedLabel(&labels->movw_label);
+      __ movw(entry_address, /* placeholder */ 0u);
+      __ BindTrackedLabel(&labels->movt_label);
+      __ movt(entry_address, /* placeholder */ 0u);
+      __ BindTrackedLabel(&labels->add_pc_label);
+      __ add(entry_address, entry_address, ShifterOperand(PC));
+      __ str(R0, Address(entry_address));
+    }
+
     arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
-
     RestoreLiveRegisters(codegen, locations);
 
-    // Store the resolved String to the BSS entry.
-    // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary for the
-    // .bss entry address in the fast path, so that we can avoid another calculation here.
-    CodeGeneratorARM::PcRelativePatchInfo* labels =
-        arm_codegen->NewPcRelativeStringPatch(load->GetDexFile(), string_index);
-    __ BindTrackedLabel(&labels->movw_label);
-    __ movw(IP, /* placeholder */ 0u);
-    __ BindTrackedLabel(&labels->movt_label);
-    __ movt(IP, /* placeholder */ 0u);
-    __ BindTrackedLabel(&labels->add_pc_label);
-    __ add(IP, IP, ShifterOperand(PC));
-    __ str(locations->Out().AsRegister<Register>(), Address(IP));
-
     __ b(GetExitLabel());
   }
 
@@ -473,8 +489,6 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
-                                                        : locations->Out();
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
 
@@ -488,26 +502,26 @@
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
-    codegen->EmitParallelMoves(
-        locations->InAt(1),
-        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-        Primitive::kPrimNot,
-        object_class,
-        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
-        Primitive::kPrimNot);
-
+    codegen->EmitParallelMoves(locations->InAt(0),
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                               Primitive::kPrimNot,
+                               locations->InAt(1),
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                               Primitive::kPrimNot);
     if (instruction_->IsInstanceOf()) {
       arm_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
                                  instruction_,
                                  instruction_->GetDexPc(),
                                  this);
-      CheckEntrypointTypes<
-          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
       arm_codegen->Move32(locations->Out(), Location::RegisterLocation(R0));
     } else {
       DCHECK(instruction_->IsCheckCast());
-      arm_codegen->InvokeRuntime(kQuickCheckCast, instruction_, instruction_->GetDexPc(), this);
-      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
+      arm_codegen->InvokeRuntime(kQuickCheckInstanceOf,
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
+      CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
     }
 
     if (!is_fatal_) {
@@ -585,11 +599,23 @@
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARM);
 };
 
-// Slow path marking an object during a read barrier.
+// Slow path marking an object reference `ref` during a read
+// barrier. The field `obj.field` in the object `obj` holding this
+// reference does not get updated by this slow path after marking (see
+// ReadBarrierMarkAndUpdateFieldSlowPathARM below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
 class ReadBarrierMarkSlowPathARM : public SlowPathCodeARM {
  public:
-  ReadBarrierMarkSlowPathARM(HInstruction* instruction, Location obj)
-      : SlowPathCodeARM(instruction), obj_(obj) {
+  ReadBarrierMarkSlowPathARM(HInstruction* instruction,
+                             Location ref,
+                             Location entrypoint = Location::NoLocation())
+      : SlowPathCodeARM(instruction), ref_(ref), entrypoint_(entrypoint) {
     DCHECK(kEmitCompilerReadBarrier);
   }
 
@@ -597,9 +623,9 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Register reg = obj_.AsRegister<Register>();
+    Register ref_reg = ref_.AsRegister<Register>();
     DCHECK(locations->CanCall());
-    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg));
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
     DCHECK(instruction_->IsInstanceFieldGet() ||
            instruction_->IsStaticFieldGet() ||
            instruction_->IsArrayGet() ||
@@ -612,44 +638,230 @@
            (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
+    // The read barrier instrumentation of object ArrayGet
+    // instructions does not support the HIntermediateAddress
+    // instruction.
+    DCHECK(!(instruction_->IsArrayGet() &&
+             instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
 
     __ Bind(GetEntryLabel());
     // No need to save live registers; it's taken care of by the
     // entrypoint. Also, there is no need to update the stack mask,
     // as this runtime call will not trigger a garbage collection.
     CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
-    DCHECK_NE(reg, SP);
-    DCHECK_NE(reg, LR);
-    DCHECK_NE(reg, PC);
+    DCHECK_NE(ref_reg, SP);
+    DCHECK_NE(ref_reg, LR);
+    DCHECK_NE(ref_reg, PC);
     // IP is used internally by the ReadBarrierMarkRegX entry point
     // as a temporary, it cannot be the entry point's input/output.
-    DCHECK_NE(reg, IP);
-    DCHECK(0 <= reg && reg < kNumberOfCoreRegisters) << reg;
+    DCHECK_NE(ref_reg, IP);
+    DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg;
     // "Compact" slow path, saving two moves.
     //
     // Instead of using the standard runtime calling convention (input
     // and output in R0):
     //
-    //   R0 <- obj
+    //   R0 <- ref
     //   R0 <- ReadBarrierMark(R0)
-    //   obj <- R0
+    //   ref <- R0
     //
-    // we just use rX (the register holding `obj`) as input and output
+    // we just use rX (the register containing `ref`) as input and output
+    // of a dedicated entrypoint:
+    //
+    //   rX <- ReadBarrierMarkRegX(rX)
+    //
+    if (entrypoint_.IsValid()) {
+      arm_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
+      __ blx(entrypoint_.AsRegister<Register>());
+    } else {
+      int32_t entry_point_offset =
+          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg);
+      // This runtime call does not require a stack map.
+      arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+    }
+    __ b(GetExitLabel());
+  }
+
+ private:
+  // The location (register) of the marked object reference.
+  const Location ref_;
+
+  // The location of the entrypoint if already loaded.
+  const Location entrypoint_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM);
+};
+
+// Slow path marking an object reference `ref` during a read barrier,
+// and if needed, atomically updating the field `obj.field` in the
+// object `obj` holding this reference after marking (contrary to
+// ReadBarrierMarkSlowPathARM above, which never tries to update
+// `obj.field`).
+//
+// This means that after the execution of this slow path, both `ref`
+// and `obj.field` will be up-to-date; i.e., after the flip, both will
+// hold the same to-space reference (unless another thread installed
+// another object reference (different from `ref`) in `obj.field`).
+class ReadBarrierMarkAndUpdateFieldSlowPathARM : public SlowPathCodeARM {
+ public:
+  ReadBarrierMarkAndUpdateFieldSlowPathARM(HInstruction* instruction,
+                                           Location ref,
+                                           Register obj,
+                                           Location field_offset,
+                                           Register temp1,
+                                           Register temp2)
+      : SlowPathCodeARM(instruction),
+        ref_(ref),
+        obj_(obj),
+        field_offset_(field_offset),
+        temp1_(temp1),
+        temp2_(temp2) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkAndUpdateFieldSlowPathARM"; }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Register ref_reg = ref_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+    // This slow path is only used by the UnsafeCASObject intrinsic.
+    DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier marking and field updating slow path: "
+        << instruction_->DebugName();
+    DCHECK(instruction_->GetLocations()->Intrinsified());
+    DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
+    DCHECK(field_offset_.IsRegisterPair()) << field_offset_;
+
+    __ Bind(GetEntryLabel());
+
+    // Save the old reference.
+    // Note that we cannot use IP to save the old reference, as IP is
+    // used internally by the ReadBarrierMarkRegX entry point, and we
+    // need the old reference after the call to that entry point.
+    DCHECK_NE(temp1_, IP);
+    __ Mov(temp1_, ref_reg);
+
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    CodeGeneratorARM* arm_codegen = down_cast<CodeGeneratorARM*>(codegen);
+    DCHECK_NE(ref_reg, SP);
+    DCHECK_NE(ref_reg, LR);
+    DCHECK_NE(ref_reg, PC);
+    // IP is used internally by the ReadBarrierMarkRegX entry point
+    // as a temporary, it cannot be the entry point's input/output.
+    DCHECK_NE(ref_reg, IP);
+    DCHECK(0 <= ref_reg && ref_reg < kNumberOfCoreRegisters) << ref_reg;
+    // "Compact" slow path, saving two moves.
+    //
+    // Instead of using the standard runtime calling convention (input
+    // and output in R0):
+    //
+    //   R0 <- ref
+    //   R0 <- ReadBarrierMark(R0)
+    //   ref <- R0
+    //
+    // we just use rX (the register containing `ref`) as input and output
     // of a dedicated entrypoint:
     //
     //   rX <- ReadBarrierMarkRegX(rX)
     //
     int32_t entry_point_offset =
-        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(reg);
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(ref_reg);
     // This runtime call does not require a stack map.
     arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+
+    // If the new reference is different from the old reference,
+    // update the field in the holder (`*(obj_ + field_offset_)`).
+    //
+    // Note that this field could also hold a different object, if
+    // another thread had concurrently changed it. In that case, the
+    // LDREX/SUBS/ITNE sequence of instructions in the compare-and-set
+    // (CAS) operation below would abort the CAS, leaving the field
+    // as-is.
+    Label done;
+    __ cmp(temp1_, ShifterOperand(ref_reg));
+    __ b(&done, EQ);
+
+    // Update the the holder's field atomically.  This may fail if
+    // mutator updates before us, but it's OK.  This is achieved
+    // using a strong compare-and-set (CAS) operation with relaxed
+    // memory synchronization ordering, where the expected value is
+    // the old reference and the desired value is the new reference.
+
+    // Convenience aliases.
+    Register base = obj_;
+    // The UnsafeCASObject intrinsic uses a register pair as field
+    // offset ("long offset"), of which only the low part contains
+    // data.
+    Register offset = field_offset_.AsRegisterPairLow<Register>();
+    Register expected = temp1_;
+    Register value = ref_reg;
+    Register tmp_ptr = IP;       // Pointer to actual memory.
+    Register tmp = temp2_;       // Value in memory.
+
+    __ add(tmp_ptr, base, ShifterOperand(offset));
+
+    if (kPoisonHeapReferences) {
+      __ PoisonHeapReference(expected);
+      if (value == expected) {
+        // Do not poison `value`, as it is the same register as
+        // `expected`, which has just been poisoned.
+      } else {
+        __ PoisonHeapReference(value);
+      }
+    }
+
+    // do {
+    //   tmp = [r_ptr] - expected;
+    // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
+
+    Label loop_head, exit_loop;
+    __ Bind(&loop_head);
+
+    __ ldrex(tmp, tmp_ptr);
+
+    __ subs(tmp, tmp, ShifterOperand(expected));
+
+    __ it(NE);
+    __ clrex(NE);
+
+    __ b(&exit_loop, NE);
+
+    __ strex(tmp, value, tmp_ptr);
+    __ cmp(tmp, ShifterOperand(1));
+    __ b(&loop_head, EQ);
+
+    __ Bind(&exit_loop);
+
+    if (kPoisonHeapReferences) {
+      __ UnpoisonHeapReference(expected);
+      if (value == expected) {
+        // Do not unpoison `value`, as it is the same register as
+        // `expected`, which has just been unpoisoned.
+      } else {
+        __ UnpoisonHeapReference(value);
+      }
+    }
+
+    __ Bind(&done);
     __ b(GetExitLabel());
   }
 
  private:
-  const Location obj_;
+  // The location (register) of the marked object reference.
+  const Location ref_;
+  // The register containing the object holding the marked object reference field.
+  const Register obj_;
+  // The location of the offset of the marked reference field within `obj_`.
+  Location field_offset_;
 
-  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM);
+  const Register temp1_;
+  const Register temp2_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM);
 };
 
 // Slow path generating a read barrier for a heap reference.
@@ -695,6 +907,11 @@
            (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
+    // The read barrier instrumentation of object ArrayGet
+    // instructions does not support the HIntermediateAddress
+    // instruction.
+    DCHECK(!(instruction_->IsArrayGet() &&
+             instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
 
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
@@ -1031,9 +1248,6 @@
 }
 
 void CodeGeneratorARM::SetupBlockedRegisters() const {
-  // Don't allocate the dalvik style register pair passing.
-  blocked_register_pairs_[R1_R2] = true;
-
   // Stack register, LR and PC are always reserved.
   blocked_core_registers_[SP] = true;
   blocked_core_registers_[LR] = true;
@@ -1053,19 +1267,6 @@
       blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
     }
   }
-
-  UpdateBlockedPairRegisters();
-}
-
-void CodeGeneratorARM::UpdateBlockedPairRegisters() const {
-  for (int i = 0; i < kNumberOfRegisterPairs; i++) {
-    ArmManagedRegister current =
-        ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
-    if (blocked_core_registers_[current.AsRegisterPairLow()]
-        || blocked_core_registers_[current.AsRegisterPairHigh()]) {
-      blocked_register_pairs_[i] = true;
-    }
-  }
 }
 
 InstructionCodeGeneratorARM::InstructionCodeGeneratorARM(HGraph* graph, CodeGeneratorARM* codegen)
@@ -1129,7 +1330,13 @@
   int adjust = GetFrameSize() - FrameEntrySpillSize();
   __ AddConstant(SP, -adjust);
   __ cfi().AdjustCFAOffset(adjust);
-  __ StoreToOffset(kStoreWord, kMethodRegisterArgument, SP, 0);
+
+  // Save the current method if we need it. Note that we do not
+  // do this in HCurrentMethod, as the instruction might have been removed
+  // in the SSA graph.
+  if (RequiresCurrentMethod()) {
+    __ StoreToOffset(kStoreWord, kMethodRegisterArgument, SP, 0);
+  }
 }
 
 void CodeGeneratorARM::GenerateFrameExit() {
@@ -4652,8 +4859,6 @@
                                         instruction->IsStringCharAt();
   HInstruction* array_instr = instruction->GetArray();
   bool has_intermediate_address = array_instr->IsIntermediateAddress();
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
 
   switch (type) {
     case Primitive::kPrimBoolean:
@@ -4661,16 +4866,21 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimChar:
     case Primitive::kPrimInt: {
+      Register length;
+      if (maybe_compressed_char_at) {
+        length = locations->GetTemp(0).AsRegister<Register>();
+        uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+        __ LoadFromOffset(kLoadWord, length, obj, count_offset);
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      }
       if (index.IsConstant()) {
         int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
         if (maybe_compressed_char_at) {
-          Register length = IP;
           Label uncompressed_load, done;
-          uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
-          __ LoadFromOffset(kLoadWord, length, obj, count_offset);
-          codegen_->MaybeRecordImplicitNullCheck(instruction);
-          __ cmp(length, ShifterOperand(0));
-          __ b(&uncompressed_load, GE);
+          __ Lsrs(length, length, 1u);  // LSRS has a 16-bit encoding, TST (immediate) does not.
+          static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                        "Expecting 0=compressed, 1=uncompressed");
+          __ b(&uncompressed_load, CS);
           __ LoadFromOffset(kLoadUnsignedByte,
                             out_loc.AsRegister<Register>(),
                             obj,
@@ -4705,12 +4915,10 @@
         }
         if (maybe_compressed_char_at) {
           Label uncompressed_load, done;
-          uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
-          Register length = locations->GetTemp(0).AsRegister<Register>();
-          __ LoadFromOffset(kLoadWord, length, obj, count_offset);
-          codegen_->MaybeRecordImplicitNullCheck(instruction);
-          __ cmp(length, ShifterOperand(0));
-          __ b(&uncompressed_load, GE);
+          __ Lsrs(length, length, 1u);  // LSRS has a 16-bit encoding, TST (immediate) does not.
+          static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                        "Expecting 0=compressed, 1=uncompressed");
+          __ b(&uncompressed_load, CS);
           __ ldrb(out_loc.AsRegister<Register>(),
                   Address(temp, index.AsRegister<Register>(), Shift::LSL, 0));
           __ b(&done);
@@ -4726,6 +4934,11 @@
     }
 
     case Primitive::kPrimNot: {
+      // The read barrier instrumentation of object ArrayGet
+      // instructions does not support the HIntermediateAddress
+      // instruction.
+      DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
+
       static_assert(
           sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
           "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
@@ -4866,8 +5079,6 @@
   Location value_loc = locations->InAt(2);
   HInstruction* array_instr = instruction->GetArray();
   bool has_intermediate_address = array_instr->IsIntermediateAddress();
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
 
   switch (value_type) {
     case Primitive::kPrimBoolean:
@@ -5112,13 +5323,11 @@
   codegen_->MaybeRecordImplicitNullCheck(instruction);
   // Mask out compression flag from String's array length.
   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
-    __ bic(out, out, ShifterOperand(1u << 31));
+    __ Lsr(out, out, 1u);
   }
 }
 
 void LocationsBuilderARM::VisitIntermediateAddress(HIntermediateAddress* instruction) {
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!kEmitCompilerReadBarrier);
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
 
@@ -5133,9 +5342,6 @@
   Location first = locations->InAt(0);
   Location second = locations->InAt(1);
 
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!kEmitCompilerReadBarrier);
-
   if (second.IsRegister()) {
     __ add(out.AsRegister<Register>(),
            first.AsRegister<Register>(),
@@ -5544,7 +5750,9 @@
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
 
-  const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+  const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
+      ? kWithoutReadBarrier
+      : kCompilerReadBarrierOption;
   bool generate_null_check = false;
   switch (cls->GetLoadKind()) {
     case HLoadClass::LoadKind::kReferrersClass: {
@@ -5556,17 +5764,17 @@
                               out_loc,
                               current_method,
                               ArtMethod::DeclaringClassOffset().Int32Value(),
-                              requires_read_barrier);
+                              read_barrier_option);
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimeAddress: {
-      DCHECK(!requires_read_barrier);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       __ LoadLiteral(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
                                                                     cls->GetTypeIndex()));
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
-      DCHECK(!requires_read_barrier);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       CodeGeneratorARM::PcRelativePatchInfo* labels =
           codegen_->NewPcRelativeTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
       __ BindTrackedLabel(&labels->movw_label);
@@ -5578,7 +5786,7 @@
       break;
     }
     case HLoadClass::LoadKind::kBootImageAddress: {
-      DCHECK(!requires_read_barrier);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       DCHECK_NE(cls->GetAddress(), 0u);
       uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
       __ LoadLiteral(out, codegen_->DeduplicateBootImageAddressLiteral(address));
@@ -5598,7 +5806,7 @@
       uint32_t offset = address & MaxInt<uint32_t>(offset_bits);
       __ LoadLiteral(out, codegen_->DeduplicateDexCacheAddressLiteral(base_address));
       // /* GcRoot<mirror::Class> */ out = *(base_address + offset)
-      GenerateGcRootFieldLoad(cls, out_loc, out, offset, requires_read_barrier);
+      GenerateGcRootFieldLoad(cls, out_loc, out, offset, read_barrier_option);
       generate_null_check = !cls->IsInDexCache();
       break;
     }
@@ -5607,7 +5815,7 @@
       HArmDexCacheArraysBase* base = cls->InputAt(0)->AsArmDexCacheArraysBase();
       int32_t offset = cls->GetDexCacheElementOffset() - base->GetElementOffset();
       // /* GcRoot<mirror::Class> */ out = *(dex_cache_arrays_base + offset)
-      GenerateGcRootFieldLoad(cls, out_loc, base_reg, offset, requires_read_barrier);
+      GenerateGcRootFieldLoad(cls, out_loc, base_reg, offset, read_barrier_option);
       generate_null_check = !cls->IsInDexCache();
       break;
     }
@@ -5621,7 +5829,7 @@
                         ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value());
       // /* GcRoot<mirror::Class> */ out = out[type_index]
       size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
-      GenerateGcRootFieldLoad(cls, out_loc, out, offset, requires_read_barrier);
+      GenerateGcRootFieldLoad(cls, out_loc, out, offset, read_barrier_option);
       generate_null_check = !cls->IsInDexCache();
     }
   }
@@ -5682,9 +5890,6 @@
       break;
     case HLoadString::LoadKind::kBootImageAddress:
       break;
-    case HLoadString::LoadKind::kDexCacheAddress:
-      DCHECK(Runtime::Current()->UseJitCompilation());
-      break;
     case HLoadString::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
@@ -5704,10 +5909,25 @@
 
   HLoadString::LoadKind load_kind = load->GetLoadKind();
   if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
-    locations->SetInAt(0, Location::RequiresRegister());
     locations->SetOut(Location::RegisterLocation(R0));
   } else {
     locations->SetOut(Location::RequiresRegister());
+    if (load_kind == HLoadString::LoadKind::kBssEntry) {
+      if (!kUseReadBarrier || kUseBakerReadBarrier) {
+        // Rely on the pResolveString and/or marking to save everything, including temps.
+        // Note that IP may theoretically be clobbered by saving/restoring the live register
+        // (only one thanks to the custom calling convention), so we request a different temp.
+        locations->AddTemp(Location::RequiresRegister());
+        RegisterSet caller_saves = RegisterSet::Empty();
+        InvokeRuntimeCallingConvention calling_convention;
+        caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+        // TODO: Add GetReturnLocation() to the calling convention so that we can DCHECK()
+        // that the the kPrimNot result register is the same as the first argument register.
+        locations->SetCustomSlowPathCallerSaves(caller_saves);
+      } else {
+        // For non-Baker read barrier we have a temp-clobbering call.
+      }
+    }
   }
 }
 
@@ -5743,15 +5963,16 @@
     }
     case HLoadString::LoadKind::kBssEntry: {
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+      Register temp = locations->GetTemp(0).AsRegister<Register>();
       CodeGeneratorARM::PcRelativePatchInfo* labels =
           codegen_->NewPcRelativeStringPatch(load->GetDexFile(), load->GetStringIndex());
       __ BindTrackedLabel(&labels->movw_label);
-      __ movw(out, /* placeholder */ 0u);
+      __ movw(temp, /* placeholder */ 0u);
       __ BindTrackedLabel(&labels->movt_label);
-      __ movt(out, /* placeholder */ 0u);
+      __ movt(temp, /* placeholder */ 0u);
       __ BindTrackedLabel(&labels->add_pc_label);
-      __ add(out, out, ShifterOperand(PC));
-      GenerateGcRootFieldLoad(load, out_loc, out, 0);
+      __ add(temp, temp, ShifterOperand(PC));
+      GenerateGcRootFieldLoad(load, out_loc, temp, /* offset */ 0, kCompilerReadBarrierOption);
       SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM(load);
       codegen_->AddSlowPath(slow_path);
       __ CompareAndBranchIfZero(out, slow_path->GetEntryLabel());
@@ -5765,6 +5986,7 @@
   // TODO: Consider re-adding the compiler code to do string dex cache lookup again.
   DCHECK(load_kind == HLoadString::LoadKind::kDexCacheViaMethod);
   InvokeRuntimeCallingConvention calling_convention;
+  DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
   __ LoadImmediate(calling_convention.GetRegisterAt(0), load->GetStringIndex());
   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
@@ -5806,12 +6028,26 @@
   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
 }
 
-static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
-  return kEmitCompilerReadBarrier &&
-      (kUseBakerReadBarrier ||
-       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck);
+// Temp is used for read barrier.
+static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
+  if (kEmitCompilerReadBarrier &&
+       (kUseBakerReadBarrier ||
+          type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+          type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+          type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+    return 1;
+  }
+  return 0;
+}
+
+// Interface case has 3 temps, one for holding the number of interfaces, one for the current
+// interface pointer, one for loading the current interface.
+// The other checks have one temp for loading the object's class.
+static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
+  if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
+    return 3;
+  }
+  return 1 + NumberOfInstanceOfTemps(type_check_kind);
 }
 
 void LocationsBuilderARM::VisitInstanceOf(HInstanceOf* instruction) {
@@ -5843,11 +6079,7 @@
   // The "out" register is used as a temporary, so it overlaps with the inputs.
   // Note that TypeCheckSlowPathARM uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-  // When read barriers are enabled, we need a temporary register for
-  // some cases.
-  if (TypeCheckNeedsATemporary(type_check_kind)) {
-    locations->AddTemp(Location::RequiresRegister());
-  }
+  locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
 }
 
 void InstructionCodeGeneratorARM::VisitInstanceOf(HInstanceOf* instruction) {
@@ -5858,9 +6090,9 @@
   Register cls = locations->InAt(1).AsRegister<Register>();
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
-  Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
-      locations->GetTemp(0) :
-      Location::NoLocation();
+  const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
+  DCHECK_LE(num_temps, 1u);
+  Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -5874,11 +6106,15 @@
     __ CompareAndBranchIfZero(obj, &zero);
   }
 
-  // /* HeapReference<Class> */ out = obj->klass_
-  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
-
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       __ cmp(out, ShifterOperand(cls));
       // Classes must be equal for the instanceof to succeed.
       __ b(&zero, NE);
@@ -5888,12 +6124,23 @@
     }
 
     case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       Label loop;
       __ Bind(&loop);
       // /* HeapReference<Class> */ out = out->super_class_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ CompareAndBranchIfZero(out, &done);
       __ cmp(out, ShifterOperand(cls));
@@ -5906,13 +6153,24 @@
     }
 
     case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       // Walk over the class hierarchy to find a match.
       Label loop, success;
       __ Bind(&loop);
       __ cmp(out, ShifterOperand(cls));
       __ b(&success, EQ);
       // /* HeapReference<Class> */ out = out->super_class_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       __ CompareAndBranchIfNonZero(out, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ b(&done);
@@ -5925,13 +6183,24 @@
     }
 
     case TypeCheckKind::kArrayObjectCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       // Do an exact check.
       Label exact_check;
       __ cmp(out, ShifterOperand(cls));
       __ b(&exact_check, EQ);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ out = out->component_type_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       component_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ CompareAndBranchIfZero(out, &done);
       __ LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
@@ -5944,6 +6213,14 @@
     }
 
     case TypeCheckKind::kArrayCheck: {
+      // No read barrier since the slow path will retry upon failure.
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kWithoutReadBarrier);
       __ cmp(out, ShifterOperand(cls));
       DCHECK(locations->OnlyCallsOnSlowPath());
       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction,
@@ -6027,13 +6304,7 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  // Note that TypeCheckSlowPathARM uses this "temp" register too.
-  locations->AddTemp(Location::RequiresRegister());
-  // When read barriers are enabled, we need an additional temporary
-  // register for some cases.
-  if (TypeCheckNeedsATemporary(type_check_kind)) {
-    locations->AddTemp(Location::RequiresRegister());
-  }
+  locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
 
 void InstructionCodeGeneratorARM::VisitCheckCast(HCheckCast* instruction) {
@@ -6044,20 +6315,31 @@
   Register cls = locations->InAt(1).AsRegister<Register>();
   Location temp_loc = locations->GetTemp(0);
   Register temp = temp_loc.AsRegister<Register>();
-  Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
-      locations->GetTemp(1) :
-      Location::NoLocation();
-  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
-  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
-  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
+  DCHECK_LE(num_temps, 3u);
+  Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
+  Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
+  const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
+  const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
+  const uint32_t object_array_data_offset =
+      mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
 
-  bool is_type_check_slow_path_fatal =
-      (type_check_kind == TypeCheckKind::kExactCheck ||
-       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
-      !instruction->CanThrowIntoCatchBlock();
+  // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
+  // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
+  // read barriers is done for performance and code size reasons.
+  bool is_type_check_slow_path_fatal = false;
+  if (!kEmitCompilerReadBarrier) {
+    is_type_check_slow_path_fatal =
+        (type_check_kind == TypeCheckKind::kExactCheck ||
+         type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+         type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+         type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+        !instruction->CanThrowIntoCatchBlock();
+  }
   SlowPathCodeARM* type_check_slow_path =
       new (GetGraph()->GetArena()) TypeCheckSlowPathARM(instruction,
                                                         is_type_check_slow_path_fatal);
@@ -6069,12 +6351,17 @@
     __ CompareAndBranchIfZero(obj, &done);
   }
 
-  // /* HeapReference<Class> */ temp = obj->klass_
-  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kArrayCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
       __ cmp(temp, ShifterOperand(cls));
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
@@ -6083,34 +6370,44 @@
     }
 
     case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      Label loop, compare_classes;
+      Label loop;
       __ Bind(&loop);
       // /* HeapReference<Class> */ temp = temp->super_class_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
 
-      // If the class reference currently in `temp` is not null, jump
-      // to the `compare_classes` label to compare it with the checked
-      // class.
-      __ CompareAndBranchIfNonZero(temp, &compare_classes);
-      // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-      __ b(type_check_slow_path->GetEntryLabel());
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception.
+      __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
 
-      __ Bind(&compare_classes);
+      // Otherwise, compare the classes.
       __ cmp(temp, ShifterOperand(cls));
       __ b(&loop, NE);
       break;
     }
 
     case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
       // Walk over the class hierarchy to find a match.
       Label loop;
       __ Bind(&loop);
@@ -6118,65 +6415,52 @@
       __ b(&done, EQ);
 
       // /* HeapReference<Class> */ temp = temp->super_class_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
 
-      // If the class reference currently in `temp` is not null, jump
-      // back at the beginning of the loop.
-      __ CompareAndBranchIfNonZero(temp, &loop);
-      // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-      __ b(type_check_slow_path->GetEntryLabel());
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception.
+      __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
+      // Otherwise, jump to the beginning of the loop.
+      __ b(&loop);
       break;
     }
 
     case TypeCheckKind::kArrayObjectCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
       // Do an exact check.
-      Label check_non_primitive_component_type;
       __ cmp(temp, ShifterOperand(cls));
       __ b(&done, EQ);
 
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ temp = temp->component_type_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
-
-      // If the component type is not null (i.e. the object is indeed
-      // an array), jump to label `check_non_primitive_component_type`
-      // to further check that this component type is not a primitive
-      // type.
-      __ CompareAndBranchIfNonZero(temp, &check_non_primitive_component_type);
-      // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-      __ b(type_check_slow_path->GetEntryLabel());
-
-      __ Bind(&check_non_primitive_component_type);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       component_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
+      // If the component type is null, jump to the slow path to throw the exception.
+      __ CompareAndBranchIfZero(temp, type_check_slow_path->GetEntryLabel());
+      // Otherwise,the object is indeed an array, jump to label `check_non_primitive_component_type`
+      // to further check that this component type is not a primitive type.
       __ LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset);
       static_assert(Primitive::kPrimNot == 0, "Expected 0 for art::Primitive::kPrimNot");
-      __ CompareAndBranchIfZero(temp, &done);
-      // Same comment as above regarding `temp` and the slow path.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-      __ b(type_check_slow_path->GetEntryLabel());
+      __ CompareAndBranchIfNonZero(temp, type_check_slow_path->GetEntryLabel());
       break;
     }
 
     case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      // We always go into the type check slow path for the unresolved
-      // and interface check cases.
-      //
+      // We always go into the type check slow path for the unresolved check case.
       // We cannot directly call the CheckCast runtime entry point
       // without resorting to a type checking slow path here (i.e. by
       // calling InvokeRuntime directly), as it would require to
@@ -6184,15 +6468,47 @@
       // instruction (following the runtime calling convention), which
       // might be cluttered by the potential first read barrier
       // emission at the beginning of this method.
-      //
-      // TODO: Introduce a new runtime entry point taking the object
-      // to test (instead of its class) as argument, and let it deal
-      // with the read barrier issues. This will let us refactor this
-      // case of the `switch` code as it was previously (with a direct
-      // call to the runtime not using a type checking slow path).
-      // This should also be beneficial for the other cases above.
+
       __ b(type_check_slow_path->GetEntryLabel());
       break;
+
+    case TypeCheckKind::kInterfaceCheck: {
+      // Avoid read barriers to improve performance of the fast path. We can not get false
+      // positives by doing this.
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
+      // /* HeapReference<Class> */ temp = temp->iftable_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        temp_loc,
+                                        iftable_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+      // Iftable is never null.
+      __ ldr(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset));
+      // Loop through the iftable and check if any class matches.
+      Label start_loop;
+      __ Bind(&start_loop);
+      __ CompareAndBranchIfZero(maybe_temp2_loc.AsRegister<Register>(),
+                                type_check_slow_path->GetEntryLabel());
+      __ ldr(maybe_temp3_loc.AsRegister<Register>(), Address(temp, object_array_data_offset));
+      __ MaybeUnpoisonHeapReference(maybe_temp3_loc.AsRegister<Register>());
+      // Go to next interface.
+      __ add(temp, temp, ShifterOperand(2 * kHeapReferenceSize));
+      __ sub(maybe_temp2_loc.AsRegister<Register>(),
+             maybe_temp2_loc.AsRegister<Register>(),
+             ShifterOperand(2));
+      // Compare the classes and continue the loop if they do not match.
+      __ cmp(cls, ShifterOperand(maybe_temp3_loc.AsRegister<Register>()));
+      __ b(&start_loop, NE);
+      break;
+    }
   }
   __ Bind(&done);
 
@@ -6466,12 +6782,15 @@
   }
 }
 
-void InstructionCodeGeneratorARM::GenerateReferenceLoadOneRegister(HInstruction* instruction,
-                                                                   Location out,
-                                                                   uint32_t offset,
-                                                                   Location maybe_temp) {
+void InstructionCodeGeneratorARM::GenerateReferenceLoadOneRegister(
+    HInstruction* instruction,
+    Location out,
+    uint32_t offset,
+    Location maybe_temp,
+    ReadBarrierOption read_barrier_option) {
   Register out_reg = out.AsRegister<Register>();
-  if (kEmitCompilerReadBarrier) {
+  if (read_barrier_option == kWithReadBarrier) {
+    CHECK(kEmitCompilerReadBarrier);
     DCHECK(maybe_temp.IsRegister()) << maybe_temp;
     if (kUseBakerReadBarrier) {
       // Load with fast path based Baker's read barrier.
@@ -6496,14 +6815,17 @@
   }
 }
 
-void InstructionCodeGeneratorARM::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
-                                                                    Location out,
-                                                                    Location obj,
-                                                                    uint32_t offset,
-                                                                    Location maybe_temp) {
+void InstructionCodeGeneratorARM::GenerateReferenceLoadTwoRegisters(
+    HInstruction* instruction,
+    Location out,
+    Location obj,
+    uint32_t offset,
+    Location maybe_temp,
+    ReadBarrierOption read_barrier_option) {
   Register out_reg = out.AsRegister<Register>();
   Register obj_reg = obj.AsRegister<Register>();
-  if (kEmitCompilerReadBarrier) {
+  if (read_barrier_option == kWithReadBarrier) {
+    CHECK(kEmitCompilerReadBarrier);
     if (kUseBakerReadBarrier) {
       DCHECK(maybe_temp.IsRegister()) << maybe_temp;
       // Load with fast path based Baker's read barrier.
@@ -6528,17 +6850,18 @@
                                                           Location root,
                                                           Register obj,
                                                           uint32_t offset,
-                                                          bool requires_read_barrier) {
+                                                          ReadBarrierOption read_barrier_option) {
   Register root_reg = root.AsRegister<Register>();
-  if (requires_read_barrier) {
+  if (read_barrier_option == kWithReadBarrier) {
     DCHECK(kEmitCompilerReadBarrier);
     if (kUseBakerReadBarrier) {
       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
       // Baker's read barrier are used:
       //
       //   root = obj.field;
-      //   if (Thread::Current()->GetIsGcMarking()) {
-      //     root = ReadBarrier::Mark(root)
+      //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+      //   if (temp != null) {
+      //     root = temp(root)
       //   }
 
       // /* GcRoot<mirror::Object> */ root = *(obj + offset)
@@ -6552,14 +6875,23 @@
                     "have different sizes.");
 
       // Slow path marking the GC root `root`.
+      Location temp = Location::RegisterLocation(LR);
       SlowPathCodeARM* slow_path =
-          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, root);
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(
+              instruction,
+              root,
+              /*entrypoint*/ temp);
       codegen_->AddSlowPath(slow_path);
 
-      // IP = Thread::Current()->GetIsGcMarking()
-      __ LoadFromOffset(
-          kLoadWord, IP, TR, Thread::IsGcMarkingOffset<kArmPointerSize>().Int32Value());
-      __ CompareAndBranchIfNonZero(IP, slow_path->GetEntryLabel());
+      // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+      const int32_t entry_point_offset =
+          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(root.reg());
+      // Loading the entrypoint does not require a load acquire since it is only changed when
+      // threads are suspended or running a checkpoint.
+      __ LoadFromOffset(kLoadWord, temp.AsRegister<Register>(), TR, entry_point_offset);
+      // The entrypoint is null when the GC is not marking, this prevents one load compared to
+      // checking GetIsGcMarking.
+      __ CompareAndBranchIfNonZero(temp.AsRegister<Register>(), slow_path->GetEntryLabel());
       __ Bind(slow_path->GetExitLabel());
     } else {
       // GC root loaded through a slow path for read barriers other
@@ -6621,7 +6953,9 @@
                                                                  Location index,
                                                                  ScaleFactor scale_factor,
                                                                  Location temp,
-                                                                 bool needs_null_check) {
+                                                                 bool needs_null_check,
+                                                                 bool always_update_field,
+                                                                 Register* temp2) {
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
@@ -6635,7 +6969,7 @@
   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
   //   HeapReference<Object> ref = *src;  // Original reference load.
-  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+  //   bool is_gray = (rb_state == ReadBarrier::GrayState());
   //   if (is_gray) {
   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
   //   }
@@ -6666,8 +7000,9 @@
 
   // The actual reference load.
   if (index.IsValid()) {
-    // Load types involving an "index": ArrayGet and
-    // UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
+    // Load types involving an "index": ArrayGet,
+    // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
+    // intrinsics.
     // /* HeapReference<Object> */ ref = *(obj + offset + (index << scale_factor))
     if (index.IsConstant()) {
       size_t computed_offset =
@@ -6675,9 +7010,9 @@
       __ LoadFromOffset(kLoadWord, ref_reg, obj, computed_offset);
     } else {
       // Handle the special case of the
-      // UnsafeGetObject/UnsafeGetObjectVolatile intrinsics, which use
-      // a register pair as index ("long offset"), of which only the low
-      // part contains data.
+      // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
+      // intrinsics, which use a register pair as index ("long
+      // offset"), of which only the low part contains data.
       Register index_reg = index.IsRegisterPair()
           ? index.AsRegisterPairLow<Register>()
           : index.AsRegister<Register>();
@@ -6693,18 +7028,30 @@
   __ MaybeUnpoisonHeapReference(ref_reg);
 
   // Slow path marking the object `ref` when it is gray.
-  SlowPathCodeARM* slow_path =
-      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref);
+  SlowPathCodeARM* slow_path;
+  if (always_update_field) {
+    DCHECK(temp2 != nullptr);
+    // ReadBarrierMarkAndUpdateFieldSlowPathARM only supports address
+    // of the form `obj + field_offset`, where `obj` is a register and
+    // `field_offset` is a register pair (of which only the lower half
+    // is used). Thus `offset` and `scale_factor` above are expected
+    // to be null in this code path.
+    DCHECK_EQ(offset, 0u);
+    DCHECK_EQ(scale_factor, ScaleFactor::TIMES_1);
+    slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM(
+        instruction, ref, obj, /* field_offset */ index, temp_reg, *temp2);
+  } else {
+    slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM(instruction, ref);
+  }
   AddSlowPath(slow_path);
 
-  // if (rb_state == ReadBarrier::gray_ptr_)
+  // if (rb_state == ReadBarrier::GrayState())
   //   ref = ReadBarrier::Mark(ref);
   // Given the numeric representation, it's enough to check the low bit of the
   // rb_state. We do that by shifting the bit out of the lock word with LSRS
   // which can be a 16-bit instruction unlike the TST immediate.
-  static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
-  static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
-  static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+  static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
   __ Lsrs(temp_reg, temp_reg, LockWord::kReadBarrierStateShift + 1);
   __ b(slow_path->GetEntryLabel(), CS);  // Carry flag is the last bit shifted out by LSRS.
   __ Bind(slow_path->GetExitLabel());
diff --git a/compiler/optimizing/code_generator_arm.h b/compiler/optimizing/code_generator_arm.h
index ef2e23f..f95dd57 100644
--- a/compiler/optimizing/code_generator_arm.h
+++ b/compiler/optimizing/code_generator_arm.h
@@ -263,7 +263,8 @@
   void GenerateReferenceLoadOneRegister(HInstruction* instruction,
                                         Location out,
                                         uint32_t offset,
-                                        Location maybe_temp);
+                                        Location maybe_temp,
+                                        ReadBarrierOption read_barrier_option);
   // Generate a heap reference load using two different registers
   // `out` and `obj`:
   //
@@ -278,17 +279,18 @@
                                          Location out,
                                          Location obj,
                                          uint32_t offset,
-                                         Location maybe_temp);
+                                         Location maybe_temp,
+                                         ReadBarrierOption read_barrier_option);
   // Generate a GC root reference load:
   //
   //   root <- *(obj + offset)
   //
-  // while honoring read barriers if requires_read_barrier is true.
+  // while honoring read barriers based on read_barrier_option.
   void GenerateGcRootFieldLoad(HInstruction* instruction,
                                Location root,
                                Register obj,
                                uint32_t offset,
-                               bool requires_read_barrier = kEmitCompilerReadBarrier);
+                               ReadBarrierOption read_barrier_option);
   void GenerateTestAndBranch(HInstruction* instruction,
                              size_t condition_input_index,
                              Label* true_target,
@@ -365,9 +367,6 @@
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
 
-  // Blocks all register pairs made out of blocked core registers.
-  void UpdateBlockedPairRegisters() const;
-
   ParallelMoveResolverARM* GetMoveResolver() OVERRIDE {
     return &move_resolver_;
   }
@@ -511,6 +510,18 @@
                                              bool needs_null_check);
   // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
   // and GenerateArrayLoadWithBakerReadBarrier.
+
+  // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
+  // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
+  //
+  // Load the object reference located at the address
+  // `obj + offset + (index << scale_factor)`, held by object `obj`, into
+  // `ref`, and mark it if needed.
+  //
+  // If `always_update_field` is true, the value of the reference is
+  // atomically updated in the holder (`obj`).  This operation
+  // requires an extra temporary register, which must be provided as a
+  // non-null pointer (`temp2`).
   void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
                                                  Location ref,
                                                  Register obj,
@@ -518,7 +529,9 @@
                                                  Location index,
                                                  ScaleFactor scale_factor,
                                                  Location temp,
-                                                 bool needs_null_check);
+                                                 bool needs_null_check,
+                                                 bool always_update_field = false,
+                                                 Register* temp2 = nullptr);
 
   // Generate a read barrier for a heap reference within `instruction`
   // using a slow path.
diff --git a/compiler/optimizing/code_generator_arm64.cc b/compiler/optimizing/code_generator_arm64.cc
index 969d653..6ec9c91 100644
--- a/compiler/optimizing/code_generator_arm64.cc
+++ b/compiler/optimizing/code_generator_arm64.cc
@@ -331,13 +331,20 @@
 
 class LoadStringSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  explicit LoadStringSlowPathARM64(HLoadString* instruction) : SlowPathCodeARM64(instruction) {}
+  LoadStringSlowPathARM64(HLoadString* instruction, Register temp, vixl::aarch64::Label* adrp_label)
+      : SlowPathCodeARM64(instruction),
+        temp_(temp),
+        adrp_label_(adrp_label) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
     DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
 
+    // temp_ is a scratch register. Make sure it's not used for saving/restoring registers.
+    UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler());
+    temps.Exclude(temp_);
+
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
 
@@ -352,21 +359,21 @@
     RestoreLiveRegisters(codegen, locations);
 
     // Store the resolved String to the BSS entry.
-    UseScratchRegisterScope temps(arm64_codegen->GetVIXLAssembler());
-    Register temp = temps.AcquireX();
     const DexFile& dex_file = instruction_->AsLoadString()->GetDexFile();
-    // TODO: Change art_quick_resolve_string to kSaveEverything and use a temporary
-    // for the ADRP in the fast path, so that we can avoid the ADRP here.
-    vixl::aarch64::Label* adrp_label =
-        arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index);
-    arm64_codegen->EmitAdrpPlaceholder(adrp_label, temp);
+    if (!kUseReadBarrier || kUseBakerReadBarrier) {
+      // The string entry page address was preserved in temp_ thanks to kSaveEverything.
+    } else {
+      // For non-Baker read barrier, we need to re-calculate the address of the string entry page.
+      adrp_label_ = arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index);
+      arm64_codegen->EmitAdrpPlaceholder(adrp_label_, temp_);
+    }
     vixl::aarch64::Label* strp_label =
-        arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index, adrp_label);
+        arm64_codegen->NewPcRelativeStringPatch(dex_file, string_index, adrp_label_);
     {
       SingleEmissionCheckScope guard(arm64_codegen->GetVIXLAssembler());
       __ Bind(strp_label);
       __ str(RegisterFrom(locations->Out(), Primitive::kPrimNot),
-             MemOperand(temp, /* offset placeholder */ 0));
+             MemOperand(temp_, /* offset placeholder */ 0));
     }
 
     __ B(GetExitLabel());
@@ -375,6 +382,9 @@
   const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathARM64"; }
 
  private:
+  const Register temp_;
+  vixl::aarch64::Label* adrp_label_;
+
   DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathARM64);
 };
 
@@ -449,9 +459,7 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Location class_to_check = locations->InAt(1);
-    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
-                                                        : locations->Out();
+
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
@@ -466,21 +474,22 @@
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
-    codegen->EmitParallelMoves(
-        class_to_check, LocationFrom(calling_convention.GetRegisterAt(0)), Primitive::kPrimNot,
-        object_class, LocationFrom(calling_convention.GetRegisterAt(1)), Primitive::kPrimNot);
-
+    codegen->EmitParallelMoves(locations->InAt(0),
+                               LocationFrom(calling_convention.GetRegisterAt(0)),
+                               Primitive::kPrimNot,
+                               locations->InAt(1),
+                               LocationFrom(calling_convention.GetRegisterAt(1)),
+                               Primitive::kPrimNot);
     if (instruction_->IsInstanceOf()) {
       arm64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
-      CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t,
-                           const mirror::Class*, const mirror::Class*>();
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
       arm64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
     } else {
       DCHECK(instruction_->IsCheckCast());
-      arm64_codegen->InvokeRuntime(kQuickCheckCast, instruction_, dex_pc, this);
-      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
+      arm64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
+      CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
     }
 
     if (!is_fatal_) {
@@ -579,11 +588,27 @@
   }
 }
 
-// Slow path marking an object during a read barrier.
+// Slow path marking an object reference `ref` during a read
+// barrier. The field `obj.field` in the object `obj` holding this
+// reference does not get updated by this slow path after marking (see
+// ReadBarrierMarkAndUpdateFieldSlowPathARM64 below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
+// If entrypoint is a valid location it is assumed to already be holding the entrypoint. The case
+// where the entrypoint is passed in is for the GcRoot read barrier.
 class ReadBarrierMarkSlowPathARM64 : public SlowPathCodeARM64 {
  public:
-  ReadBarrierMarkSlowPathARM64(HInstruction* instruction, Location obj)
-      : SlowPathCodeARM64(instruction), obj_(obj) {
+  ReadBarrierMarkSlowPathARM64(HInstruction* instruction,
+                               Location ref,
+                               Location entrypoint = Location::NoLocation())
+      : SlowPathCodeARM64(instruction),
+        ref_(ref),
+        entrypoint_(entrypoint) {
     DCHECK(kEmitCompilerReadBarrier);
   }
 
@@ -592,7 +617,8 @@
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
     DCHECK(locations->CanCall());
-    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(obj_.reg()));
+    DCHECK(ref_.IsRegister()) << ref_;
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
     DCHECK(instruction_->IsInstanceFieldGet() ||
            instruction_->IsStaticFieldGet() ||
            instruction_->IsArrayGet() ||
@@ -605,44 +631,225 @@
            (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
         << "Unexpected instruction in read barrier marking slow path: "
         << instruction_->DebugName();
+    // The read barrier instrumentation of object ArrayGet
+    // instructions does not support the HIntermediateAddress
+    // instruction.
+    DCHECK(!(instruction_->IsArrayGet() &&
+             instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
 
     __ Bind(GetEntryLabel());
     // No need to save live registers; it's taken care of by the
     // entrypoint. Also, there is no need to update the stack mask,
     // as this runtime call will not trigger a garbage collection.
     CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
-    DCHECK_NE(obj_.reg(), LR);
-    DCHECK_NE(obj_.reg(), WSP);
-    DCHECK_NE(obj_.reg(), WZR);
+    DCHECK_NE(ref_.reg(), LR);
+    DCHECK_NE(ref_.reg(), WSP);
+    DCHECK_NE(ref_.reg(), WZR);
     // IP0 is used internally by the ReadBarrierMarkRegX entry point
     // as a temporary, it cannot be the entry point's input/output.
-    DCHECK_NE(obj_.reg(), IP0);
-    DCHECK(0 <= obj_.reg() && obj_.reg() < kNumberOfWRegisters) << obj_.reg();
+    DCHECK_NE(ref_.reg(), IP0);
+    DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg();
     // "Compact" slow path, saving two moves.
     //
     // Instead of using the standard runtime calling convention (input
     // and output in W0):
     //
-    //   W0 <- obj
+    //   W0 <- ref
     //   W0 <- ReadBarrierMark(W0)
-    //   obj <- W0
+    //   ref <- W0
     //
-    // we just use rX (the register holding `obj`) as input and output
+    // we just use rX (the register containing `ref`) as input and output
+    // of a dedicated entrypoint:
+    //
+    //   rX <- ReadBarrierMarkRegX(rX)
+    //
+    if (entrypoint_.IsValid()) {
+      arm64_codegen->ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction_, this);
+      __ Blr(XRegisterFrom(entrypoint_));
+    } else {
+      // Entrypoint is not already loaded, load from the thread.
+      int32_t entry_point_offset =
+          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
+      // This runtime call does not require a stack map.
+      arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+    }
+    __ B(GetExitLabel());
+  }
+
+ private:
+  // The location (register) of the marked object reference.
+  const Location ref_;
+
+  // The location of the entrypoint if it is already loaded.
+  const Location entrypoint_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
+};
+
+// Slow path marking an object reference `ref` during a read barrier,
+// and if needed, atomically updating the field `obj.field` in the
+// object `obj` holding this reference after marking (contrary to
+// ReadBarrierMarkSlowPathARM64 above, which never tries to update
+// `obj.field`).
+//
+// This means that after the execution of this slow path, both `ref`
+// and `obj.field` will be up-to-date; i.e., after the flip, both will
+// hold the same to-space reference (unless another thread installed
+// another object reference (different from `ref`) in `obj.field`).
+class ReadBarrierMarkAndUpdateFieldSlowPathARM64 : public SlowPathCodeARM64 {
+ public:
+  ReadBarrierMarkAndUpdateFieldSlowPathARM64(HInstruction* instruction,
+                                             Location ref,
+                                             Register obj,
+                                             Location field_offset,
+                                             Register temp)
+      : SlowPathCodeARM64(instruction),
+        ref_(ref),
+        obj_(obj),
+        field_offset_(field_offset),
+        temp_(temp) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE {
+    return "ReadBarrierMarkAndUpdateFieldSlowPathARM64";
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Register ref_reg = WRegisterFrom(ref_);
+    DCHECK(locations->CanCall());
+    DCHECK(ref_.IsRegister()) << ref_;
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_.reg())) << ref_.reg();
+    // This slow path is only used by the UnsafeCASObject intrinsic.
+    DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier marking and field updating slow path: "
+        << instruction_->DebugName();
+    DCHECK(instruction_->GetLocations()->Intrinsified());
+    DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
+    DCHECK(field_offset_.IsRegister()) << field_offset_;
+
+    __ Bind(GetEntryLabel());
+
+    // Save the old reference.
+    // Note that we cannot use IP to save the old reference, as IP is
+    // used internally by the ReadBarrierMarkRegX entry point, and we
+    // need the old reference after the call to that entry point.
+    DCHECK_NE(LocationFrom(temp_).reg(), IP0);
+    __ Mov(temp_.W(), ref_reg);
+
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    CodeGeneratorARM64* arm64_codegen = down_cast<CodeGeneratorARM64*>(codegen);
+    DCHECK_NE(ref_.reg(), LR);
+    DCHECK_NE(ref_.reg(), WSP);
+    DCHECK_NE(ref_.reg(), WZR);
+    // IP0 is used internally by the ReadBarrierMarkRegX entry point
+    // as a temporary, it cannot be the entry point's input/output.
+    DCHECK_NE(ref_.reg(), IP0);
+    DCHECK(0 <= ref_.reg() && ref_.reg() < kNumberOfWRegisters) << ref_.reg();
+    // "Compact" slow path, saving two moves.
+    //
+    // Instead of using the standard runtime calling convention (input
+    // and output in W0):
+    //
+    //   W0 <- ref
+    //   W0 <- ReadBarrierMark(W0)
+    //   ref <- W0
+    //
+    // we just use rX (the register containing `ref`) as input and output
     // of a dedicated entrypoint:
     //
     //   rX <- ReadBarrierMarkRegX(rX)
     //
     int32_t entry_point_offset =
-        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(obj_.reg());
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(ref_.reg());
     // This runtime call does not require a stack map.
     arm64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+
+    // If the new reference is different from the old reference,
+    // update the field in the holder (`*(obj_ + field_offset_)`).
+    //
+    // Note that this field could also hold a different object, if
+    // another thread had concurrently changed it. In that case, the
+    // LDXR/CMP/BNE sequence of instructions in the compare-and-set
+    // (CAS) operation below would abort the CAS, leaving the field
+    // as-is.
+    vixl::aarch64::Label done;
+    __ Cmp(temp_.W(), ref_reg);
+    __ B(eq, &done);
+
+    // Update the the holder's field atomically.  This may fail if
+    // mutator updates before us, but it's OK.  This is achieved
+    // using a strong compare-and-set (CAS) operation with relaxed
+    // memory synchronization ordering, where the expected value is
+    // the old reference and the desired value is the new reference.
+
+    MacroAssembler* masm = arm64_codegen->GetVIXLAssembler();
+    UseScratchRegisterScope temps(masm);
+
+    // Convenience aliases.
+    Register base = obj_.W();
+    Register offset = XRegisterFrom(field_offset_);
+    Register expected = temp_.W();
+    Register value = ref_reg;
+    Register tmp_ptr = temps.AcquireX();    // Pointer to actual memory.
+    Register tmp_value = temps.AcquireW();  // Value in memory.
+
+    __ Add(tmp_ptr, base.X(), Operand(offset));
+
+    if (kPoisonHeapReferences) {
+      arm64_codegen->GetAssembler()->PoisonHeapReference(expected);
+      if (value.Is(expected)) {
+        // Do not poison `value`, as it is the same register as
+        // `expected`, which has just been poisoned.
+      } else {
+        arm64_codegen->GetAssembler()->PoisonHeapReference(value);
+      }
+    }
+
+    // do {
+    //   tmp_value = [tmp_ptr] - expected;
+    // } while (tmp_value == 0 && failure([tmp_ptr] <- r_new_value));
+
+    vixl::aarch64::Label loop_head, comparison_failed, exit_loop;
+    __ Bind(&loop_head);
+    __ Ldxr(tmp_value, MemOperand(tmp_ptr));
+    __ Cmp(tmp_value, expected);
+    __ B(&comparison_failed, ne);
+    __ Stxr(tmp_value, value, MemOperand(tmp_ptr));
+    __ Cbnz(tmp_value, &loop_head);
+    __ B(&exit_loop);
+    __ Bind(&comparison_failed);
+    __ Clrex();
+    __ Bind(&exit_loop);
+
+    if (kPoisonHeapReferences) {
+      arm64_codegen->GetAssembler()->UnpoisonHeapReference(expected);
+      if (value.Is(expected)) {
+        // Do not unpoison `value`, as it is the same register as
+        // `expected`, which has just been unpoisoned.
+      } else {
+        arm64_codegen->GetAssembler()->UnpoisonHeapReference(value);
+      }
+    }
+
+    __ Bind(&done);
     __ B(GetExitLabel());
   }
 
  private:
-  const Location obj_;
+  // The location (register) of the marked object reference.
+  const Location ref_;
+  // The register containing the object holding the marked object reference field.
+  const Register obj_;
+  // The location of the offset of the marked reference field within `obj_`.
+  Location field_offset_;
 
-  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathARM64);
+  const Register temp_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathARM64);
 };
 
 // Slow path generating a read barrier for a heap reference.
@@ -688,7 +895,9 @@
            (instruction_->IsInvokeVirtual()) && instruction_->GetLocations()->Intrinsified())
         << "Unexpected instruction in read barrier for heap reference slow path: "
         << instruction_->DebugName();
-    // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+    // The read barrier instrumentation of object ArrayGet
+    // instructions does not support the HIntermediateAddress
+    // instruction.
     DCHECK(!(instruction_->IsArrayGet() &&
              instruction_->AsArrayGet()->GetArray()->IsIntermediateAddress()));
 
@@ -758,7 +967,7 @@
         DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
                (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
             << instruction_->AsInvoke()->GetIntrinsic();
-        DCHECK_EQ(offset_, 0U);
+        DCHECK_EQ(offset_, 0u);
         DCHECK(index_.IsRegister());
       }
     }
@@ -1046,7 +1255,15 @@
     //      ...                       : other preserved fp registers.
     //      ...                       : reserved frame space.
     //      sp[0]                     : current method.
-    __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
+
+    // Save the current method if we need it. Note that we do not
+    // do this in HCurrentMethod, as the instruction might have been removed
+    // in the SSA graph.
+    if (RequiresCurrentMethod()) {
+      __ Str(kArtMethodRegister, MemOperand(sp, -frame_size, PreIndex));
+    } else {
+      __ Claim(frame_size);
+    }
     GetAssembler()->cfi().AdjustCFAOffset(frame_size);
     GetAssembler()->SpillRegisters(GetFramePreservedCoreRegisters(),
         frame_size - GetCoreSpillSize());
@@ -1996,8 +2213,6 @@
 }
 
 void LocationsBuilderARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!kEmitCompilerReadBarrier);
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
   locations->SetInAt(0, Location::RequiresRegister());
@@ -2005,10 +2220,7 @@
   locations->SetOut(Location::RequiresRegister());
 }
 
-void InstructionCodeGeneratorARM64::VisitIntermediateAddress(
-    HIntermediateAddress* instruction) {
-  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-  DCHECK(!kEmitCompilerReadBarrier);
+void InstructionCodeGeneratorARM64::VisitIntermediateAddress(HIntermediateAddress* instruction) {
   __ Add(OutputRegister(instruction),
          InputRegisterAt(instruction, 0),
          Operand(InputOperandAt(instruction, 1)));
@@ -2108,11 +2320,15 @@
   // Block pools between `Load` and `MaybeRecordImplicitNullCheck`.
   BlockPoolsScope block_pools(masm);
 
+  // The read barrier instrumentation of object ArrayGet instructions
+  // does not support the HIntermediateAddress instruction.
+  DCHECK(!((type == Primitive::kPrimNot) &&
+           instruction->GetArray()->IsIntermediateAddress() &&
+           kEmitCompilerReadBarrier));
+
   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // Object ArrayGet with Baker's read barrier case.
     Register temp = temps.AcquireW();
-    // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
-    DCHECK(!instruction->GetArray()->IsIntermediateAddress());
     // Note that a potential implicit null check is handled in the
     // CodeGeneratorARM64::GenerateArrayLoadWithBakerReadBarrier call.
     codegen_->GenerateArrayLoadWithBakerReadBarrier(
@@ -2124,13 +2340,22 @@
     if (maybe_compressed_char_at) {
       uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
       length = temps.AcquireW();
-      __ Ldr(length, HeapOperand(obj, count_offset));
+      if (instruction->GetArray()->IsIntermediateAddress()) {
+        DCHECK_LT(count_offset, offset);
+        int64_t adjusted_offset = static_cast<int64_t>(count_offset) - static_cast<int64_t>(offset);
+        // Note that `adjusted_offset` is negative, so this will be a LDUR.
+        __ Ldr(length, MemOperand(obj.X(), adjusted_offset));
+      } else {
+        __ Ldr(length, HeapOperand(obj, count_offset));
+      }
       codegen_->MaybeRecordImplicitNullCheck(instruction);
     }
     if (index.IsConstant()) {
       if (maybe_compressed_char_at) {
         vixl::aarch64::Label uncompressed_load, done;
-        __ Tbz(length.W(), kWRegSize - 1, &uncompressed_load);
+        static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                      "Expecting 0=compressed, 1=uncompressed");
+        __ Tbnz(length.W(), 0, &uncompressed_load);
         __ Ldrb(Register(OutputCPURegister(instruction)),
                 HeapOperand(obj, offset + Int64ConstantFrom(index)));
         __ B(&done);
@@ -2145,9 +2370,6 @@
     } else {
       Register temp = temps.AcquireSameSizeAs(obj);
       if (instruction->GetArray()->IsIntermediateAddress()) {
-        // The read barrier instrumentation does not support the
-        // HIntermediateAddress instruction yet.
-        DCHECK(!kEmitCompilerReadBarrier);
         // We do not need to compute the intermediate address from the array: the
         // input instruction has done it already. See the comment in
         // `TryExtractArrayAccessAddress()`.
@@ -2161,7 +2383,9 @@
       }
       if (maybe_compressed_char_at) {
         vixl::aarch64::Label uncompressed_load, done;
-        __ Tbz(length.W(), kWRegSize - 1, &uncompressed_load);
+        static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                      "Expecting 0=compressed, 1=uncompressed");
+        __ Tbnz(length.W(), 0, &uncompressed_load);
         __ Ldrb(Register(OutputCPURegister(instruction)),
                 HeapOperand(temp, XRegisterFrom(index), LSL, 0));
         __ B(&done);
@@ -2206,7 +2430,7 @@
   codegen_->MaybeRecordImplicitNullCheck(instruction);
   // Mask out compression flag from String's array length.
   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
-    __ And(out.W(), out.W(), Operand(static_cast<int32_t>(INT32_MAX)));
+    __ Lsr(out.W(), out.W(), 1u);
   }
 }
 
@@ -2255,9 +2479,6 @@
       UseScratchRegisterScope temps(masm);
       Register temp = temps.AcquireSameSizeAs(array);
       if (instruction->GetArray()->IsIntermediateAddress()) {
-        // The read barrier instrumentation does not support the
-        // HIntermediateAddress instruction yet.
-        DCHECK(!kEmitCompilerReadBarrier);
         // We do not need to compute the intermediate address from the array: the
         // input instruction has done it already. See the comment in
         // `TryExtractArrayAccessAddress()`.
@@ -3118,12 +3339,26 @@
   HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
 }
 
-static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
-  return kEmitCompilerReadBarrier &&
+// Temp is used for read barrier.
+static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
+  if (kEmitCompilerReadBarrier &&
       (kUseBakerReadBarrier ||
-       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck);
+          type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+          type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+          type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+    return 1;
+  }
+  return 0;
+}
+
+// Interface case has 3 temps, one for holding the number of interfaces, one for the current
+// interface pointer, one for loading the current interface.
+// The other checks have one temp for loading the object's class.
+static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
+  if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
+    return 3;
+  }
+  return 1 + NumberOfInstanceOfTemps(type_check_kind);
 }
 
 void LocationsBuilderARM64::VisitInstanceOf(HInstanceOf* instruction) {
@@ -3155,11 +3390,8 @@
   // The "out" register is used as a temporary, so it overlaps with the inputs.
   // Note that TypeCheckSlowPathARM64 uses this register too.
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
-  // When read barriers are enabled, we need a temporary register for
-  // some cases.
-  if (TypeCheckNeedsATemporary(type_check_kind)) {
-    locations->AddTemp(Location::RequiresRegister());
-  }
+  // Add temps if necessary for read barriers.
+  locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
 }
 
 void InstructionCodeGeneratorARM64::VisitInstanceOf(HInstanceOf* instruction) {
@@ -3170,9 +3402,9 @@
   Register cls = InputRegisterAt(instruction, 1);
   Location out_loc = locations->Out();
   Register out = OutputRegister(instruction);
-  Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
-      locations->GetTemp(0) :
-      Location::NoLocation();
+  const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
+  DCHECK_LE(num_temps, 1u);
+  Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -3187,11 +3419,15 @@
     __ Cbz(obj, &zero);
   }
 
-  // /* HeapReference<Class> */ out = obj->klass_
-  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset, maybe_temp_loc);
-
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       __ Cmp(out, cls);
       __ Cset(out, eq);
       if (zero.IsLinked()) {
@@ -3201,12 +3437,23 @@
     }
 
     case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       vixl::aarch64::Label loop, success;
       __ Bind(&loop);
       // /* HeapReference<Class> */ out = out->super_class_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Cbz(out, &done);
       __ Cmp(out, cls);
@@ -3219,13 +3466,24 @@
     }
 
     case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       // Walk over the class hierarchy to find a match.
       vixl::aarch64::Label loop, success;
       __ Bind(&loop);
       __ Cmp(out, cls);
       __ B(eq, &success);
       // /* HeapReference<Class> */ out = out->super_class_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       __ Cbnz(out, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ B(&done);
@@ -3238,13 +3496,24 @@
     }
 
     case TypeCheckKind::kArrayObjectCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kCompilerReadBarrierOption);
       // Do an exact check.
       vixl::aarch64::Label exact_check;
       __ Cmp(out, cls);
       __ B(eq, &exact_check);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ out = out->component_type_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       component_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ Cbz(out, &done);
       __ Ldrh(out, HeapOperand(out, primitive_offset));
@@ -3257,6 +3526,14 @@
     }
 
     case TypeCheckKind::kArrayCheck: {
+      // No read barrier since the slow path will retry upon failure.
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc,
+                                        kWithoutReadBarrier);
       __ Cmp(out, cls);
       DCHECK(locations->OnlyCallsOnSlowPath());
       slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
@@ -3340,13 +3617,8 @@
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
   locations->SetInAt(1, Location::RequiresRegister());
-  // Note that TypeCheckSlowPathARM64 uses this "temp" register too.
-  locations->AddTemp(Location::RequiresRegister());
-  // When read barriers are enabled, we need an additional temporary
-  // register for some cases.
-  if (TypeCheckNeedsATemporary(type_check_kind)) {
-    locations->AddTemp(Location::RequiresRegister());
-  }
+  // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathARM64.
+  locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
 
 void InstructionCodeGeneratorARM64::VisitCheckCast(HCheckCast* instruction) {
@@ -3355,22 +3627,34 @@
   Location obj_loc = locations->InAt(0);
   Register obj = InputRegisterAt(instruction, 0);
   Register cls = InputRegisterAt(instruction, 1);
+  const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
+  DCHECK_GE(num_temps, 1u);
+  DCHECK_LE(num_temps, 3u);
   Location temp_loc = locations->GetTemp(0);
-  Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
-      locations->GetTemp(1) :
-      Location::NoLocation();
+  Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
+  Location maybe_temp3_loc = (num_temps >= 3) ? locations->GetTemp(2) : Location::NoLocation();
   Register temp = WRegisterFrom(temp_loc);
-  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
-  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
-  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
+  const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
+  const uint32_t object_array_data_offset =
+      mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
 
-  bool is_type_check_slow_path_fatal =
-      (type_check_kind == TypeCheckKind::kExactCheck ||
-       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
-      !instruction->CanThrowIntoCatchBlock();
+  bool is_type_check_slow_path_fatal = false;
+  // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
+  // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
+  // read barriers is done for performance and code size reasons.
+  if (!kEmitCompilerReadBarrier) {
+    is_type_check_slow_path_fatal =
+        (type_check_kind == TypeCheckKind::kExactCheck ||
+         type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+         type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+         type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+        !instruction->CanThrowIntoCatchBlock();
+  }
   SlowPathCodeARM64* type_check_slow_path =
       new (GetGraph()->GetArena()) TypeCheckSlowPathARM64(instruction,
                                                           is_type_check_slow_path_fatal);
@@ -3382,12 +3666,17 @@
     __ Cbz(obj, &done);
   }
 
-  // /* HeapReference<Class> */ temp = obj->klass_
-  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kArrayCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
       __ Cmp(temp, cls);
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
@@ -3396,34 +3685,43 @@
     }
 
     case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      vixl::aarch64::Label loop, compare_classes;
+      vixl::aarch64::Label loop;
       __ Bind(&loop);
       // /* HeapReference<Class> */ temp = temp->super_class_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
 
-      // If the class reference currently in `temp` is not null, jump
-      // to the `compare_classes` label to compare it with the checked
-      // class.
-      __ Cbnz(temp, &compare_classes);
-      // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-      __ B(type_check_slow_path->GetEntryLabel());
-
-      __ Bind(&compare_classes);
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception.
+      __ Cbz(temp, type_check_slow_path->GetEntryLabel());
+      // Otherwise, compare classes.
       __ Cmp(temp, cls);
       __ B(ne, &loop);
       break;
     }
 
     case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
       // Walk over the class hierarchy to find a match.
       vixl::aarch64::Label loop;
       __ Bind(&loop);
@@ -3431,64 +3729,53 @@
       __ B(eq, &done);
 
       // /* HeapReference<Class> */ temp = temp->super_class_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
 
       // If the class reference currently in `temp` is not null, jump
       // back at the beginning of the loop.
       __ Cbnz(temp, &loop);
       // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
       __ B(type_check_slow_path->GetEntryLabel());
       break;
     }
 
     case TypeCheckKind::kArrayObjectCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
       // Do an exact check.
-      vixl::aarch64::Label check_non_primitive_component_type;
       __ Cmp(temp, cls);
       __ B(eq, &done);
 
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ temp = temp->component_type_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       component_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
 
-      // If the component type is not null (i.e. the object is indeed
-      // an array), jump to label `check_non_primitive_component_type`
-      // to further check that this component type is not a primitive
-      // type.
-      __ Cbnz(temp, &check_non_primitive_component_type);
-      // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-      __ B(type_check_slow_path->GetEntryLabel());
-
-      __ Bind(&check_non_primitive_component_type);
+      // If the component type is null, jump to the slow path to throw the exception.
+      __ Cbz(temp, type_check_slow_path->GetEntryLabel());
+      // Otherwise, the object is indeed an array. Further check that this component type is not a
+      // primitive type.
       __ Ldrh(temp, HeapOperand(temp, primitive_offset));
       static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
-      __ Cbz(temp, &done);
-      // Same comment as above regarding `temp` and the slow path.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(
-          instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
-      __ B(type_check_slow_path->GetEntryLabel());
+      __ Cbnz(temp, type_check_slow_path->GetEntryLabel());
       break;
     }
 
     case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      // We always go into the type check slow path for the unresolved
-      // and interface check cases.
+      // We always go into the type check slow path for the unresolved check cases.
       //
       // We cannot directly call the CheckCast runtime entry point
       // without resorting to a type checking slow path here (i.e. by
@@ -3497,15 +3784,40 @@
       // instruction (following the runtime calling convention), which
       // might be cluttered by the potential first read barrier
       // emission at the beginning of this method.
-      //
-      // TODO: Introduce a new runtime entry point taking the object
-      // to test (instead of its class) as argument, and let it deal
-      // with the read barrier issues. This will let us refactor this
-      // case of the `switch` code as it was previously (with a direct
-      // call to the runtime not using a type checking slow path).
-      // This should also be beneficial for the other cases above.
       __ B(type_check_slow_path->GetEntryLabel());
       break;
+    case TypeCheckKind::kInterfaceCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+
+      // /* HeapReference<Class> */ temp = temp->iftable_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        temp_loc,
+                                        iftable_offset,
+                                        maybe_temp2_loc,
+                                        kWithoutReadBarrier);
+      // Iftable is never null.
+      __ Ldr(WRegisterFrom(maybe_temp2_loc), HeapOperand(temp.W(), array_length_offset));
+      // Loop through the iftable and check if any class matches.
+      vixl::aarch64::Label start_loop;
+      __ Bind(&start_loop);
+      __ Cbz(WRegisterFrom(maybe_temp2_loc), type_check_slow_path->GetEntryLabel());
+      __ Ldr(WRegisterFrom(maybe_temp3_loc), HeapOperand(temp.W(), object_array_data_offset));
+      GetAssembler()->MaybeUnpoisonHeapReference(WRegisterFrom(maybe_temp3_loc));
+      // Go to next interface.
+      __ Add(temp, temp, 2 * kHeapReferenceSize);
+      __ Sub(WRegisterFrom(maybe_temp2_loc), WRegisterFrom(maybe_temp2_loc), 2);
+      // Compare the classes and continue the loop if they do not match.
+      __ Cmp(cls, WRegisterFrom(maybe_temp3_loc));
+      __ B(ne, &start_loop);
+      break;
+    }
   }
   __ Bind(&done);
 
@@ -4068,7 +4380,9 @@
   Location out_loc = cls->GetLocations()->Out();
   Register out = OutputRegister(cls);
 
-  const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+  const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
+      ? kWithoutReadBarrier
+      : kCompilerReadBarrierOption;
   bool generate_null_check = false;
   switch (cls->GetLoadKind()) {
     case HLoadClass::LoadKind::kReferrersClass: {
@@ -4080,17 +4394,17 @@
                               out_loc,
                               current_method,
                               ArtMethod::DeclaringClassOffset().Int32Value(),
-                              /*fixup_label*/ nullptr,
-                              requires_read_barrier);
+                              /* fixup_label */ nullptr,
+                              read_barrier_option);
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
-      DCHECK(!requires_read_barrier);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       __ Ldr(out, codegen_->DeduplicateBootImageTypeLiteral(cls->GetDexFile(),
                                                             cls->GetTypeIndex()));
       break;
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
-      DCHECK(!requires_read_barrier);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       // Add ADRP with its PC-relative type patch.
       const DexFile& dex_file = cls->GetDexFile();
       uint32_t type_index = cls->GetTypeIndex();
@@ -4103,7 +4417,7 @@
       break;
     }
     case HLoadClass::LoadKind::kBootImageAddress: {
-      DCHECK(!requires_read_barrier);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       DCHECK(cls->GetAddress() != 0u && IsUint<32>(cls->GetAddress()));
       __ Ldr(out.W(), codegen_->DeduplicateBootImageAddressLiteral(cls->GetAddress()));
       break;
@@ -4125,8 +4439,8 @@
                               out_loc,
                               out.X(),
                               offset,
-                              /*fixup_label*/ nullptr,
-                              requires_read_barrier);
+                              /* fixup_label */ nullptr,
+                              read_barrier_option);
       generate_null_check = !cls->IsInDexCache();
       break;
     }
@@ -4146,7 +4460,7 @@
                               out.X(),
                               /* offset placeholder */ 0,
                               ldr_label,
-                              requires_read_barrier);
+                              read_barrier_option);
       generate_null_check = !cls->IsInDexCache();
       break;
     }
@@ -4162,8 +4476,8 @@
                               out_loc,
                               out.X(),
                               CodeGenerator::GetCacheOffset(cls->GetTypeIndex()),
-                              /*fixup_label*/ nullptr,
-                              requires_read_barrier);
+                              /* fixup_label */ nullptr,
+                              read_barrier_option);
       generate_null_check = !cls->IsInDexCache();
       break;
     }
@@ -4218,9 +4532,6 @@
       break;
     case HLoadString::LoadKind::kBootImageAddress:
       break;
-    case HLoadString::LoadKind::kDexCacheAddress:
-      DCHECK(Runtime::Current()->UseJitCompilation());
-      break;
     case HLoadString::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
@@ -4238,11 +4549,24 @@
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
-    locations->SetInAt(0, Location::RequiresRegister());
     InvokeRuntimeCallingConvention calling_convention;
     locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
   } else {
     locations->SetOut(Location::RequiresRegister());
+    if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
+      if (!kUseReadBarrier || kUseBakerReadBarrier) {
+        // Rely on the pResolveString and/or marking to save everything, including temps.
+        RegisterSet caller_saves = RegisterSet::Empty();
+        InvokeRuntimeCallingConvention calling_convention;
+        caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0).GetCode()));
+        DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(),
+                  RegisterFrom(calling_convention.GetReturnLocation(Primitive::kPrimNot),
+                               Primitive::kPrimNot).GetCode());
+        locations->SetCustomSlowPathCallerSaves(caller_saves);
+      } else {
+        // For non-Baker read barrier we have a temp-clobbering call.
+      }
+    }
   }
 }
 
@@ -4277,18 +4601,22 @@
       const DexFile& dex_file = load->GetDexFile();
       uint32_t string_index = load->GetStringIndex();
       DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
+      UseScratchRegisterScope temps(codegen_->GetVIXLAssembler());
+      Register temp = temps.AcquireX();
       vixl::aarch64::Label* adrp_label = codegen_->NewPcRelativeStringPatch(dex_file, string_index);
-      codegen_->EmitAdrpPlaceholder(adrp_label, out.X());
+      codegen_->EmitAdrpPlaceholder(adrp_label, temp);
       // Add LDR with its PC-relative String patch.
       vixl::aarch64::Label* ldr_label =
           codegen_->NewPcRelativeStringPatch(dex_file, string_index, adrp_label);
       // /* GcRoot<mirror::Class> */ out = *(base_address + offset)  /* PC-relative */
       GenerateGcRootFieldLoad(load,
                               load->GetLocations()->Out(),
-                              out.X(),
-                              /* placeholder */ 0u,
-                              ldr_label);
-      SlowPathCodeARM64* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load);
+                              temp,
+                              /* offset placeholder */ 0u,
+                              ldr_label,
+                              kCompilerReadBarrierOption);
+      SlowPathCodeARM64* slow_path =
+          new (GetGraph()->GetArena()) LoadStringSlowPathARM64(load, temp, adrp_label);
       codegen_->AddSlowPath(slow_path);
       __ Cbz(out.X(), slow_path->GetEntryLabel());
       __ Bind(slow_path->GetExitLabel());
@@ -4300,6 +4628,7 @@
 
   // TODO: Re-add the compiler code to do string dex cache lookup again.
   InvokeRuntimeCallingConvention calling_convention;
+  DCHECK_EQ(calling_convention.GetRegisterAt(0).GetCode(), out.GetCode());
   __ Mov(calling_convention.GetRegisterAt(0).W(), load->GetStringIndex());
   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
@@ -4987,13 +5316,16 @@
   }
 }
 
-void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(HInstruction* instruction,
-                                                                     Location out,
-                                                                     uint32_t offset,
-                                                                     Location maybe_temp) {
+void InstructionCodeGeneratorARM64::GenerateReferenceLoadOneRegister(
+    HInstruction* instruction,
+    Location out,
+    uint32_t offset,
+    Location maybe_temp,
+    ReadBarrierOption read_barrier_option) {
   Primitive::Type type = Primitive::kPrimNot;
   Register out_reg = RegisterFrom(out, type);
-  if (kEmitCompilerReadBarrier) {
+  if (read_barrier_option == kWithReadBarrier) {
+    CHECK(kEmitCompilerReadBarrier);
     Register temp_reg = RegisterFrom(maybe_temp, type);
     if (kUseBakerReadBarrier) {
       // Load with fast path based Baker's read barrier.
@@ -5023,15 +5355,18 @@
   }
 }
 
-void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
-                                                                      Location out,
-                                                                      Location obj,
-                                                                      uint32_t offset,
-                                                                      Location maybe_temp) {
+void InstructionCodeGeneratorARM64::GenerateReferenceLoadTwoRegisters(
+    HInstruction* instruction,
+    Location out,
+    Location obj,
+    uint32_t offset,
+    Location maybe_temp,
+    ReadBarrierOption read_barrier_option) {
   Primitive::Type type = Primitive::kPrimNot;
   Register out_reg = RegisterFrom(out, type);
   Register obj_reg = RegisterFrom(obj, type);
-  if (kEmitCompilerReadBarrier) {
+  if (read_barrier_option == kWithReadBarrier) {
+    CHECK(kEmitCompilerReadBarrier);
     if (kUseBakerReadBarrier) {
       // Load with fast path based Baker's read barrier.
       Register temp_reg = RegisterFrom(maybe_temp, type);
@@ -5057,23 +5392,25 @@
   }
 }
 
-void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(HInstruction* instruction,
-                                                            Location root,
-                                                            Register obj,
-                                                            uint32_t offset,
-                                                            vixl::aarch64::Label* fixup_label,
-                                                            bool requires_read_barrier) {
+void InstructionCodeGeneratorARM64::GenerateGcRootFieldLoad(
+    HInstruction* instruction,
+    Location root,
+    Register obj,
+    uint32_t offset,
+    vixl::aarch64::Label* fixup_label,
+    ReadBarrierOption read_barrier_option) {
   DCHECK(fixup_label == nullptr || offset == 0u);
   Register root_reg = RegisterFrom(root, Primitive::kPrimNot);
-  if (requires_read_barrier) {
+  if (read_barrier_option == kWithReadBarrier) {
     DCHECK(kEmitCompilerReadBarrier);
     if (kUseBakerReadBarrier) {
       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
       // Baker's read barrier are used:
       //
       //   root = obj.field;
-      //   if (Thread::Current()->GetIsGcMarking()) {
-      //     root = ReadBarrier::Mark(root)
+      //   temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+      //   if (temp != null) {
+      //     root = temp(root)
       //   }
 
       // /* GcRoot<mirror::Object> */ root = *(obj + offset)
@@ -5090,16 +5427,22 @@
                     "art::mirror::CompressedReference<mirror::Object> and int32_t "
                     "have different sizes.");
 
-      // Slow path marking the GC root `root`.
-      SlowPathCodeARM64* slow_path =
-          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, root);
-      codegen_->AddSlowPath(slow_path);
+      Register temp = lr;
 
-      MacroAssembler* masm = GetVIXLAssembler();
-      UseScratchRegisterScope temps(masm);
-      Register temp = temps.AcquireW();
-      // temp = Thread::Current()->GetIsGcMarking()
-      __ Ldr(temp, MemOperand(tr, Thread::IsGcMarkingOffset<kArm64PointerSize>().Int32Value()));
+      // Slow path marking the GC root `root`. The entrypoint will alrady be loaded in temp.
+      SlowPathCodeARM64* slow_path =
+          new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction,
+                                                                    root,
+                                                                    LocationFrom(temp));
+      codegen_->AddSlowPath(slow_path);
+      const int32_t entry_point_offset =
+          CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArm64PointerSize>(root.reg());
+      // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
+      // Loading the entrypoint does not require a load acquire since it is only changed when
+      // threads are suspended or running a checkpoint.
+      __ Ldr(temp, MemOperand(tr, entry_point_offset));
+      // The entrypoint is null when the GC is not marking, this prevents one load compared to
+      // checking GetIsGcMarking.
       __ Cbnz(temp, slow_path->GetEntryLabel());
       __ Bind(slow_path->GetExitLabel());
     } else {
@@ -5139,7 +5482,7 @@
 
   // /* HeapReference<Object> */ ref = *(obj + offset)
   Location no_index = Location::NoLocation();
-  size_t no_scale_factor = 0U;
+  size_t no_scale_factor = 0u;
   GenerateReferenceLoadWithBakerReadBarrier(instruction,
                                             ref,
                                             obj,
@@ -5190,7 +5533,8 @@
                                                                    size_t scale_factor,
                                                                    Register temp,
                                                                    bool needs_null_check,
-                                                                   bool use_load_acquire) {
+                                                                   bool use_load_acquire,
+                                                                   bool always_update_field) {
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
   // If we are emitting an array load, we should not be using a
@@ -5211,7 +5555,7 @@
   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
   //   HeapReference<Object> ref = *src;  // Original reference load.
-  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+  //   bool is_gray = (rb_state == ReadBarrier::GrayState());
   //   if (is_gray) {
   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
   //   }
@@ -5243,7 +5587,9 @@
 
   // The actual reference load.
   if (index.IsValid()) {
-    // Load types involving an "index".
+    // Load types involving an "index": ArrayGet,
+    // UnsafeGetObject/UnsafeGetObjectVolatile and UnsafeCASObject
+    // intrinsics.
     if (use_load_acquire) {
       // UnsafeGetObjectVolatile intrinsic case.
       // Register `index` is not an index in an object array, but an
@@ -5252,9 +5598,9 @@
       DCHECK(instruction->GetLocations()->Intrinsified());
       DCHECK(instruction->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile)
           << instruction->AsInvoke()->GetIntrinsic();
-      DCHECK_EQ(offset, 0U);
-      DCHECK_EQ(scale_factor, 0U);
-      DCHECK_EQ(needs_null_check, 0U);
+      DCHECK_EQ(offset, 0u);
+      DCHECK_EQ(scale_factor, 0u);
+      DCHECK_EQ(needs_null_check, 0u);
       // /* HeapReference<Object> */ ref = *(obj + index)
       MemOperand field = HeapOperand(obj, XRegisterFrom(index));
       LoadAcquire(instruction, ref_reg, field, /* needs_null_check */ false);
@@ -5265,10 +5611,10 @@
         uint32_t computed_offset = offset + (Int64ConstantFrom(index) << scale_factor);
         Load(type, ref_reg, HeapOperand(obj, computed_offset));
       } else {
-        Register temp2 = temps.AcquireW();
-        __ Add(temp2, obj, offset);
-        Load(type, ref_reg, HeapOperand(temp2, XRegisterFrom(index), LSL, scale_factor));
-        temps.Release(temp2);
+        Register temp3 = temps.AcquireW();
+        __ Add(temp3, obj, offset);
+        Load(type, ref_reg, HeapOperand(temp3, XRegisterFrom(index), LSL, scale_factor));
+        temps.Release(temp3);
       }
     }
   } else {
@@ -5285,16 +5631,26 @@
   GetAssembler()->MaybeUnpoisonHeapReference(ref_reg);
 
   // Slow path marking the object `ref` when it is gray.
-  SlowPathCodeARM64* slow_path =
-      new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref);
+  SlowPathCodeARM64* slow_path;
+  if (always_update_field) {
+    // ReadBarrierMarkAndUpdateFieldSlowPathARM64 only supports
+    // address of the form `obj + field_offset`, where `obj` is a
+    // register and `field_offset` is a register. Thus `offset` and
+    // `scale_factor` above are expected to be null in this code path.
+    DCHECK_EQ(offset, 0u);
+    DCHECK_EQ(scale_factor, 0u);  /* "times 1" */
+    slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathARM64(
+        instruction, ref, obj, /* field_offset */ index, temp);
+  } else {
+    slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathARM64(instruction, ref);
+  }
   AddSlowPath(slow_path);
 
-  // if (rb_state == ReadBarrier::gray_ptr_)
+  // if (rb_state == ReadBarrier::GrayState())
   //   ref = ReadBarrier::Mark(ref);
   // Given the numeric representation, it's enough to check the low bit of the rb_state.
-  static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
-  static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
-  static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+  static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
   __ Tbnz(temp, LockWord::kReadBarrierStateShift, slow_path->GetEntryLabel());
   __ Bind(slow_path->GetExitLabel());
 }
diff --git a/compiler/optimizing/code_generator_arm64.h b/compiler/optimizing/code_generator_arm64.h
index eb28ecb..0e8d4fd 100644
--- a/compiler/optimizing/code_generator_arm64.h
+++ b/compiler/optimizing/code_generator_arm64.h
@@ -269,7 +269,8 @@
   void GenerateReferenceLoadOneRegister(HInstruction* instruction,
                                         Location out,
                                         uint32_t offset,
-                                        Location maybe_temp);
+                                        Location maybe_temp,
+                                        ReadBarrierOption read_barrier_option);
   // Generate a heap reference load using two different registers
   // `out` and `obj`:
   //
@@ -284,18 +285,19 @@
                                          Location out,
                                          Location obj,
                                          uint32_t offset,
-                                         Location maybe_temp);
+                                         Location maybe_temp,
+                                         ReadBarrierOption read_barrier_option);
   // Generate a GC root reference load:
   //
   //   root <- *(obj + offset)
   //
-  // while honoring read barriers (if any).
+  // while honoring read barriers based on read_barrier_option.
   void GenerateGcRootFieldLoad(HInstruction* instruction,
                                Location root,
                                vixl::aarch64::Register obj,
                                uint32_t offset,
-                               vixl::aarch64::Label* fixup_label = nullptr,
-                               bool requires_read_barrier = kEmitCompilerReadBarrier);
+                               vixl::aarch64::Label* fixup_label,
+                               ReadBarrierOption read_barrier_option);
 
   // Generate a floating-point comparison.
   void GenerateFcmp(HInstruction* instruction);
@@ -594,6 +596,13 @@
                                              bool needs_null_check);
   // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
   // and GenerateArrayLoadWithBakerReadBarrier.
+  //
+  // Load the object reference located at the address
+  // `obj + offset + (index << scale_factor)`, held by object `obj`, into
+  // `ref`, and mark it if needed.
+  //
+  // If `always_update_field` is true, the value of the reference is
+  // atomically updated in the holder (`obj`).
   void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
                                                  Location ref,
                                                  vixl::aarch64::Register obj,
@@ -602,7 +611,8 @@
                                                  size_t scale_factor,
                                                  vixl::aarch64::Register temp,
                                                  bool needs_null_check,
-                                                 bool use_load_acquire);
+                                                 bool use_load_acquire,
+                                                 bool always_update_field = false);
 
   // Generate a read barrier for a heap reference within `instruction`
   // using a slow path.
diff --git a/compiler/optimizing/code_generator_arm_vixl.cc b/compiler/optimizing/code_generator_arm_vixl.cc
index 226f109..3df55ae 100644
--- a/compiler/optimizing/code_generator_arm_vixl.cc
+++ b/compiler/optimizing/code_generator_arm_vixl.cc
@@ -23,6 +23,7 @@
 #include "compiled_method.h"
 #include "entrypoints/quick/quick_entrypoints.h"
 #include "gc/accounting/card_table.h"
+#include "intrinsics_arm_vixl.h"
 #include "mirror/array-inl.h"
 #include "mirror/class-inl.h"
 #include "thread.h"
@@ -37,13 +38,23 @@
 namespace vixl32 = vixl::aarch32;
 using namespace vixl32;  // NOLINT(build/namespaces)
 
+using helpers::DRegisterFrom;
 using helpers::DWARFReg;
-using helpers::FromLowSToD;
-using helpers::OutputRegister;
-using helpers::InputRegisterAt;
+using helpers::HighDRegisterFrom;
+using helpers::HighRegisterFrom;
 using helpers::InputOperandAt;
-using helpers::OutputSRegister;
+using helpers::InputRegister;
+using helpers::InputRegisterAt;
 using helpers::InputSRegisterAt;
+using helpers::InputVRegisterAt;
+using helpers::LocationFrom;
+using helpers::LowRegisterFrom;
+using helpers::LowSRegisterFrom;
+using helpers::OutputRegister;
+using helpers::OutputSRegister;
+using helpers::OutputVRegister;
+using helpers::RegisterFrom;
+using helpers::SRegisterFrom;
 
 using RegisterList = vixl32::RegisterList;
 
@@ -52,16 +63,14 @@
   return ((location.low() & 1) == 0) && (location.low() + 1 == location.high());
 }
 
+static constexpr int kCurrentMethodStackOffset = 0;
 static constexpr size_t kArmInstrMaxSizeInBytes = 4u;
+static constexpr uint32_t kPackedSwitchCompareJumpThreshold = 7;
 
 #ifdef __
 #error "ARM Codegen VIXL macro-assembler macro already defined."
 #endif
 
-// TODO: Remove with later pop when codegen complete.
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wunused-parameter"
-
 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
 #define __ down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler()->  // NOLINT
 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kArmPointerSize, x).Int32Value()
@@ -69,22 +78,222 @@
 // Marker that code is yet to be, and must, be implemented.
 #define TODO_VIXL32(level) LOG(level) << __PRETTY_FUNCTION__ << " unimplemented "
 
+// SaveLiveRegisters and RestoreLiveRegisters from SlowPathCodeARM operate on sets of S registers,
+// for each live D registers they treat two corresponding S registers as live ones.
+//
+// Two following functions (SaveContiguousSRegisterList, RestoreContiguousSRegisterList) build
+// from a list of contiguous S registers a list of contiguous D registers (processing first/last
+// S registers corner cases) and save/restore this new list treating them as D registers.
+// - decreasing code size
+// - avoiding hazards on Cortex-A57, when a pair of S registers for an actual live D register is
+//   restored and then used in regular non SlowPath code as D register.
+//
+// For the following example (v means the S register is live):
+//   D names: |    D0   |    D1   |    D2   |    D4   | ...
+//   S names: | S0 | S1 | S2 | S3 | S4 | S5 | S6 | S7 | ...
+//   Live?    |    |  v |  v |  v |  v |  v |  v |    | ...
+//
+// S1 and S6 will be saved/restored independently; D registers list (D1, D2) will be processed
+// as D registers.
+//
+// TODO(VIXL): All this code should be unnecessary once the VIXL AArch32 backend provides helpers
+// for lists of floating-point registers.
+static size_t SaveContiguousSRegisterList(size_t first,
+                                          size_t last,
+                                          CodeGenerator* codegen,
+                                          size_t stack_offset) {
+  static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes.");
+  static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes.");
+  DCHECK_LE(first, last);
+  if ((first == last) && (first == 0)) {
+    __ Vstr(vixl32::SRegister(first), MemOperand(sp, stack_offset));
+    return stack_offset + kSRegSizeInBytes;
+  }
+  if (first % 2 == 1) {
+    __ Vstr(vixl32::SRegister(first++), MemOperand(sp, stack_offset));
+    stack_offset += kSRegSizeInBytes;
+  }
+
+  bool save_last = false;
+  if (last % 2 == 0) {
+    save_last = true;
+    --last;
+  }
+
+  if (first < last) {
+    vixl32::DRegister d_reg = vixl32::DRegister(first / 2);
+    DCHECK_EQ((last - first + 1) % 2, 0u);
+    size_t number_of_d_regs = (last - first + 1) / 2;
+
+    if (number_of_d_regs == 1) {
+      __ Vstr(d_reg, MemOperand(sp, stack_offset));
+    } else if (number_of_d_regs > 1) {
+      UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler());
+      vixl32::Register base = sp;
+      if (stack_offset != 0) {
+        base = temps.Acquire();
+        __ Add(base, sp, stack_offset);
+      }
+      __ Vstm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs));
+    }
+    stack_offset += number_of_d_regs * kDRegSizeInBytes;
+  }
+
+  if (save_last) {
+    __ Vstr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset));
+    stack_offset += kSRegSizeInBytes;
+  }
+
+  return stack_offset;
+}
+
+static size_t RestoreContiguousSRegisterList(size_t first,
+                                             size_t last,
+                                             CodeGenerator* codegen,
+                                             size_t stack_offset) {
+  static_assert(kSRegSizeInBytes == kArmWordSize, "Broken assumption on reg/word sizes.");
+  static_assert(kDRegSizeInBytes == 2 * kArmWordSize, "Broken assumption on reg/word sizes.");
+  DCHECK_LE(first, last);
+  if ((first == last) && (first == 0)) {
+    __ Vldr(vixl32::SRegister(first), MemOperand(sp, stack_offset));
+    return stack_offset + kSRegSizeInBytes;
+  }
+  if (first % 2 == 1) {
+    __ Vldr(vixl32::SRegister(first++), MemOperand(sp, stack_offset));
+    stack_offset += kSRegSizeInBytes;
+  }
+
+  bool restore_last = false;
+  if (last % 2 == 0) {
+    restore_last = true;
+    --last;
+  }
+
+  if (first < last) {
+    vixl32::DRegister d_reg = vixl32::DRegister(first / 2);
+    DCHECK_EQ((last - first + 1) % 2, 0u);
+    size_t number_of_d_regs = (last - first + 1) / 2;
+    if (number_of_d_regs == 1) {
+      __ Vldr(d_reg, MemOperand(sp, stack_offset));
+    } else if (number_of_d_regs > 1) {
+      UseScratchRegisterScope temps(down_cast<CodeGeneratorARMVIXL*>(codegen)->GetVIXLAssembler());
+      vixl32::Register base = sp;
+      if (stack_offset != 0) {
+        base = temps.Acquire();
+        __ Add(base, sp, stack_offset);
+      }
+      __ Vldm(F64, base, NO_WRITE_BACK, DRegisterList(d_reg, number_of_d_regs));
+    }
+    stack_offset += number_of_d_regs * kDRegSizeInBytes;
+  }
+
+  if (restore_last) {
+    __ Vldr(vixl32::SRegister(last + 1), MemOperand(sp, stack_offset));
+    stack_offset += kSRegSizeInBytes;
+  }
+
+  return stack_offset;
+}
+
+void SlowPathCodeARMVIXL::SaveLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
+  size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
+  size_t orig_offset = stack_offset;
+
+  const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
+  for (uint32_t i : LowToHighBits(core_spills)) {
+    // If the register holds an object, update the stack mask.
+    if (locations->RegisterContainsObject(i)) {
+      locations->SetStackBit(stack_offset / kVRegSize);
+    }
+    DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+    DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
+    saved_core_stack_offsets_[i] = stack_offset;
+    stack_offset += kArmWordSize;
+  }
+
+  CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+  arm_codegen->GetAssembler()->StoreRegisterList(core_spills, orig_offset);
+
+  uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
+  orig_offset = stack_offset;
+  for (uint32_t i : LowToHighBits(fp_spills)) {
+    DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
+    saved_fpu_stack_offsets_[i] = stack_offset;
+    stack_offset += kArmWordSize;
+  }
+
+  stack_offset = orig_offset;
+  while (fp_spills != 0u) {
+    uint32_t begin = CTZ(fp_spills);
+    uint32_t tmp = fp_spills + (1u << begin);
+    fp_spills &= tmp;  // Clear the contiguous range of 1s.
+    uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp);  // CTZ(0) is undefined.
+    stack_offset = SaveContiguousSRegisterList(begin, end - 1, codegen, stack_offset);
+  }
+  DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+}
+
+void SlowPathCodeARMVIXL::RestoreLiveRegisters(CodeGenerator* codegen, LocationSummary* locations) {
+  size_t stack_offset = codegen->GetFirstRegisterSlotInSlowPath();
+  size_t orig_offset = stack_offset;
+
+  const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ true);
+  for (uint32_t i : LowToHighBits(core_spills)) {
+    DCHECK_LT(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+    DCHECK_LT(i, kMaximumNumberOfExpectedRegisters);
+    stack_offset += kArmWordSize;
+  }
+
+  // TODO(VIXL): Check the coherency of stack_offset after this with a test.
+  CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+  arm_codegen->GetAssembler()->LoadRegisterList(core_spills, orig_offset);
+
+  uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers */ false);
+  while (fp_spills != 0u) {
+    uint32_t begin = CTZ(fp_spills);
+    uint32_t tmp = fp_spills + (1u << begin);
+    fp_spills &= tmp;  // Clear the contiguous range of 1s.
+    uint32_t end = (tmp == 0u) ? 32u : CTZ(tmp);  // CTZ(0) is undefined.
+    stack_offset = RestoreContiguousSRegisterList(begin, end - 1, codegen, stack_offset);
+  }
+  DCHECK_LE(stack_offset, codegen->GetFrameSize() - codegen->FrameEntrySpillSize());
+}
+
+class NullCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+  explicit NullCheckSlowPathARMVIXL(HNullCheck* instruction) : SlowPathCodeARMVIXL(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+    __ Bind(GetEntryLabel());
+    if (instruction_->CanThrowIntoCatchBlock()) {
+      // Live registers will be restored in the catch block if caught.
+      SaveLiveRegisters(codegen, instruction_->GetLocations());
+    }
+    arm_codegen->InvokeRuntime(kQuickThrowNullPointer,
+                               instruction_,
+                               instruction_->GetDexPc(),
+                               this);
+    CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
+  }
+
+  bool IsFatal() const OVERRIDE { return true; }
+
+  const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathARMVIXL"; }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathARMVIXL);
+};
+
 class DivZeroCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
  public:
   explicit DivZeroCheckSlowPathARMVIXL(HDivZeroCheck* instruction)
       : SlowPathCodeARMVIXL(instruction) {}
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
-    CodeGeneratorARMVIXL* armvixl_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+    CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
     __ Bind(GetEntryLabel());
-    if (instruction_->CanThrowIntoCatchBlock()) {
-      // Live registers will be restored in the catch block if caught.
-      SaveLiveRegisters(codegen, instruction_->GetLocations());
-    }
-    armvixl_codegen->InvokeRuntime(kQuickThrowDivZero,
-                                   instruction_,
-                                   instruction_->GetDexPc(),
-                                   this);
+    arm_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
     CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
   }
 
@@ -96,6 +305,257 @@
   DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathARMVIXL);
 };
 
+class SuspendCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+  SuspendCheckSlowPathARMVIXL(HSuspendCheck* instruction, HBasicBlock* successor)
+      : SlowPathCodeARMVIXL(instruction), successor_(successor) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+    __ Bind(GetEntryLabel());
+    arm_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickTestSuspend, void, void>();
+    if (successor_ == nullptr) {
+      __ B(GetReturnLabel());
+    } else {
+      __ B(arm_codegen->GetLabelOf(successor_));
+    }
+  }
+
+  vixl32::Label* GetReturnLabel() {
+    DCHECK(successor_ == nullptr);
+    return &return_label_;
+  }
+
+  HBasicBlock* GetSuccessor() const {
+    return successor_;
+  }
+
+  const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathARMVIXL"; }
+
+ private:
+  // If not null, the block to branch to after the suspend check.
+  HBasicBlock* const successor_;
+
+  // If `successor_` is null, the label to branch to after the suspend check.
+  vixl32::Label return_label_;
+
+  DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathARMVIXL);
+};
+
+class BoundsCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+  explicit BoundsCheckSlowPathARMVIXL(HBoundsCheck* instruction)
+      : SlowPathCodeARMVIXL(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+    LocationSummary* locations = instruction_->GetLocations();
+
+    __ Bind(GetEntryLabel());
+    if (instruction_->CanThrowIntoCatchBlock()) {
+      // Live registers will be restored in the catch block if caught.
+      SaveLiveRegisters(codegen, instruction_->GetLocations());
+    }
+    // We're moving two locations to locations that could overlap, so we need a parallel
+    // move resolver.
+    InvokeRuntimeCallingConventionARMVIXL calling_convention;
+    codegen->EmitParallelMoves(
+        locations->InAt(0),
+        LocationFrom(calling_convention.GetRegisterAt(0)),
+        Primitive::kPrimInt,
+        locations->InAt(1),
+        LocationFrom(calling_convention.GetRegisterAt(1)),
+        Primitive::kPrimInt);
+    QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
+        ? kQuickThrowStringBounds
+        : kQuickThrowArrayBounds;
+    arm_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
+    CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
+  }
+
+  bool IsFatal() const OVERRIDE { return true; }
+
+  const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathARMVIXL"; }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathARMVIXL);
+};
+
+class LoadClassSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+  LoadClassSlowPathARMVIXL(HLoadClass* cls, HInstruction* at, uint32_t dex_pc, bool do_clinit)
+      : SlowPathCodeARMVIXL(at), cls_(cls), at_(at), dex_pc_(dex_pc), do_clinit_(do_clinit) {
+    DCHECK(at->IsLoadClass() || at->IsClinitCheck());
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = at_->GetLocations();
+
+    CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConventionARMVIXL calling_convention;
+    __ Mov(calling_convention.GetRegisterAt(0), cls_->GetTypeIndex());
+    QuickEntrypointEnum entrypoint = do_clinit_ ? kQuickInitializeStaticStorage
+                                                : kQuickInitializeType;
+    arm_codegen->InvokeRuntime(entrypoint, at_, dex_pc_, this);
+    if (do_clinit_) {
+      CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
+    } else {
+      CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
+    }
+
+    // Move the class to the desired location.
+    Location out = locations->Out();
+    if (out.IsValid()) {
+      DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
+      arm_codegen->Move32(locations->Out(), LocationFrom(r0));
+    }
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathARMVIXL"; }
+
+ private:
+  // The class this slow path will load.
+  HLoadClass* const cls_;
+
+  // The instruction where this slow path is happening.
+  // (Might be the load class or an initialization check).
+  HInstruction* const at_;
+
+  // The dex PC of `at_`.
+  const uint32_t dex_pc_;
+
+  // Whether to initialize the class.
+  const bool do_clinit_;
+
+  DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathARMVIXL);
+};
+
+class TypeCheckSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+  TypeCheckSlowPathARMVIXL(HInstruction* instruction, bool is_fatal)
+      : SlowPathCodeARMVIXL(instruction), is_fatal_(is_fatal) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(instruction_->IsCheckCast()
+           || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
+
+    CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+    __ Bind(GetEntryLabel());
+
+    if (!is_fatal_) {
+      SaveLiveRegisters(codegen, locations);
+    }
+
+    // We're moving two locations to locations that could overlap, so we need a parallel
+    // move resolver.
+    InvokeRuntimeCallingConventionARMVIXL calling_convention;
+
+    codegen->EmitParallelMoves(locations->InAt(0),
+                               LocationFrom(calling_convention.GetRegisterAt(0)),
+                               Primitive::kPrimNot,
+                               locations->InAt(1),
+                               LocationFrom(calling_convention.GetRegisterAt(1)),
+                               Primitive::kPrimNot);
+    if (instruction_->IsInstanceOf()) {
+      arm_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
+      arm_codegen->Move32(locations->Out(), LocationFrom(r0));
+    } else {
+      DCHECK(instruction_->IsCheckCast());
+      arm_codegen->InvokeRuntime(kQuickCheckInstanceOf,
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
+      CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
+    }
+
+    if (!is_fatal_) {
+      RestoreLiveRegisters(codegen, locations);
+      __ B(GetExitLabel());
+    }
+  }
+
+  const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathARMVIXL"; }
+
+  bool IsFatal() const OVERRIDE { return is_fatal_; }
+
+ private:
+  const bool is_fatal_;
+
+  DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathARMVIXL);
+};
+
+class DeoptimizationSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+  explicit DeoptimizationSlowPathARMVIXL(HDeoptimize* instruction)
+      : SlowPathCodeARMVIXL(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+    __ Bind(GetEntryLabel());
+    arm_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickDeoptimize, void, void>();
+  }
+
+  const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathARMVIXL"; }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathARMVIXL);
+};
+
+class ArraySetSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+  explicit ArraySetSlowPathARMVIXL(HInstruction* instruction) : SlowPathCodeARMVIXL(instruction) {}
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    __ Bind(GetEntryLabel());
+    SaveLiveRegisters(codegen, locations);
+
+    InvokeRuntimeCallingConventionARMVIXL calling_convention;
+    HParallelMove parallel_move(codegen->GetGraph()->GetArena());
+    parallel_move.AddMove(
+        locations->InAt(0),
+        LocationFrom(calling_convention.GetRegisterAt(0)),
+        Primitive::kPrimNot,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(1),
+        LocationFrom(calling_convention.GetRegisterAt(1)),
+        Primitive::kPrimInt,
+        nullptr);
+    parallel_move.AddMove(
+        locations->InAt(2),
+        LocationFrom(calling_convention.GetRegisterAt(2)),
+        Primitive::kPrimNot,
+        nullptr);
+    codegen->GetMoveResolver()->EmitNativeCode(&parallel_move);
+
+    CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+    arm_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
+    CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
+    RestoreLiveRegisters(codegen, locations);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathARMVIXL"; }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathARMVIXL);
+};
+
+
 inline vixl32::Condition ARMCondition(IfCondition cond) {
   switch (cond) {
     case kCondEQ: return eq;
@@ -151,16 +611,6 @@
   }
 }
 
-void SlowPathCodeARMVIXL::SaveLiveRegisters(CodeGenerator* codegen ATTRIBUTE_UNUSED,
-                                            LocationSummary* locations ATTRIBUTE_UNUSED) {
-  TODO_VIXL32(FATAL);
-}
-
-void SlowPathCodeARMVIXL::RestoreLiveRegisters(CodeGenerator* codegen ATTRIBUTE_UNUSED,
-                                               LocationSummary* locations ATTRIBUTE_UNUSED) {
-  TODO_VIXL32(FATAL);
-}
-
 void CodeGeneratorARMVIXL::DumpCoreRegister(std::ostream& stream, int reg) const {
   stream << vixl32::Register(reg);
 }
@@ -169,7 +619,7 @@
   stream << vixl32::SRegister(reg);
 }
 
-static uint32_t ComputeSRegisterMask(const SRegisterList& regs) {
+static uint32_t ComputeSRegisterListMask(const SRegisterList& regs) {
   uint32_t mask = 0;
   for (uint32_t i = regs.GetFirstSRegister().GetCode();
        i <= regs.GetLastSRegister().GetCode();
@@ -179,6 +629,11 @@
   return mask;
 }
 
+size_t CodeGeneratorARMVIXL::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
+  GetAssembler()->LoadSFromOffset(vixl32::SRegister(reg_id), sp, stack_index);
+  return kArmWordSize;
+}
+
 #undef __
 
 CodeGeneratorARMVIXL::CodeGeneratorARMVIXL(HGraph* graph,
@@ -190,10 +645,11 @@
                     kNumberOfSRegisters,
                     kNumberOfRegisterPairs,
                     kCoreCalleeSaves.GetList(),
-                    ComputeSRegisterMask(kFpuCalleeSaves),
+                    ComputeSRegisterListMask(kFpuCalleeSaves),
                     compiler_options,
                     stats),
       block_labels_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
+      jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
       location_builder_(graph, this),
       instruction_visitor_(graph, this),
       move_resolver_(graph->GetArena(), this),
@@ -201,19 +657,60 @@
       isa_features_(isa_features) {
   // Always save the LR register to mimic Quick.
   AddAllocatedRegister(Location::RegisterLocation(LR));
+  // Give d14 and d15 as scratch registers to VIXL.
+  // They are removed from the register allocator in `SetupBlockedRegisters()`.
+  // TODO(VIXL): We need two scratch D registers for `EmitSwap` when swapping two double stack
+  // slots. If that is sufficiently rare, and we have pressure on FP registers, we could instead
+  // spill in `EmitSwap`. But if we actually are guaranteed to have 32 D registers, we could give
+  // d30 and d31 to VIXL to avoid removing registers from the allocator. If that is the case, we may
+  // also want to investigate giving those 14 other D registers to the allocator.
+  GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d14);
+  GetVIXLAssembler()->GetScratchVRegisterList()->Combine(d15);
 }
 
-#define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()->
+void JumpTableARMVIXL::EmitTable(CodeGeneratorARMVIXL* codegen) {
+  uint32_t num_entries = switch_instr_->GetNumEntries();
+  DCHECK_GE(num_entries, kPackedSwitchCompareJumpThreshold);
+
+  // We are about to use the assembler to place literals directly. Make sure we have enough
+  // underlying code buffer and we have generated the jump table with right size.
+  codegen->GetVIXLAssembler()->GetBuffer().Align();
+  AssemblerAccurateScope aas(codegen->GetVIXLAssembler(),
+                             num_entries * sizeof(int32_t),
+                             CodeBufferCheckScope::kMaximumSize);
+  // TODO(VIXL): Check that using lower case bind is fine here.
+  codegen->GetVIXLAssembler()->bind(&table_start_);
+  const ArenaVector<HBasicBlock*>& successors = switch_instr_->GetBlock()->GetSuccessors();
+  for (uint32_t i = 0; i < num_entries; i++) {
+    vixl32::Label* target_label = codegen->GetLabelOf(successors[i]);
+    DCHECK(target_label->IsBound());
+    int32_t jump_offset = target_label->GetLocation() - table_start_.GetLocation();
+    // When doing BX to address we need to have lower bit set to 1 in T32.
+    if (codegen->GetVIXLAssembler()->IsUsingT32()) {
+      jump_offset++;
+    }
+    DCHECK_GT(jump_offset, std::numeric_limits<int32_t>::min());
+    DCHECK_LE(jump_offset, std::numeric_limits<int32_t>::max());
+    vixl32::Literal<int32_t> literal(jump_offset);
+    codegen->GetVIXLAssembler()->place(&literal);
+  }
+}
+
+void CodeGeneratorARMVIXL::EmitJumpTables() {
+  for (auto&& jump_table : jump_tables_) {
+    jump_table->EmitTable(this);
+  }
+}
+
+#define __ reinterpret_cast<ArmVIXLAssembler*>(GetAssembler())->GetVIXLAssembler()->  // NOLINT
 
 void CodeGeneratorARMVIXL::Finalize(CodeAllocator* allocator) {
+  EmitJumpTables();
   GetAssembler()->FinalizeCode();
   CodeGenerator::Finalize(allocator);
 }
 
 void CodeGeneratorARMVIXL::SetupBlockedRegisters() const {
-  // Don't allocate the dalvik style register pair passing.
-  blocked_register_pairs_[R1_R2] = true;
-
   // Stack register, LR and PC are always reserved.
   blocked_core_registers_[SP] = true;
   blocked_core_registers_[LR] = true;
@@ -225,6 +722,13 @@
   // Reserve temp register.
   blocked_core_registers_[IP] = true;
 
+  // Registers s28-s31 (d14-d15) are left to VIXL for scratch registers.
+  // (They are given to the `MacroAssembler` in `CodeGeneratorARMVIXL::CodeGeneratorARMVIXL`.)
+  blocked_fpu_registers_[28] = true;
+  blocked_fpu_registers_[29] = true;
+  blocked_fpu_registers_[30] = true;
+  blocked_fpu_registers_[31] = true;
+
   if (GetGraph()->IsDebuggable()) {
     // Stubs do not save callee-save floating point registers. If the graph
     // is debuggable, we need to deal with these registers differently. For
@@ -235,25 +739,6 @@
       blocked_fpu_registers_[i] = true;
     }
   }
-
-  UpdateBlockedPairRegisters();
-}
-
-// Blocks all register pairs containing blocked core registers.
-void CodeGeneratorARMVIXL::UpdateBlockedPairRegisters() const {
-  for (int i = 0; i < kNumberOfRegisterPairs; i++) {
-    ArmManagedRegister current =
-        ArmManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
-    if (blocked_core_registers_[current.AsRegisterPairLow()]
-        || blocked_core_registers_[current.AsRegisterPairHigh()]) {
-      blocked_register_pairs_[i] = true;
-    }
-  }
-}
-
-void InstructionCodeGeneratorARMVIXL::GenerateSuspendCheck(HSuspendCheck* instruction,
-                                                           HBasicBlock* successor) {
-  TODO_VIXL32(FATAL);
 }
 
 InstructionCodeGeneratorARMVIXL::InstructionCodeGeneratorARMVIXL(HGraph* graph,
@@ -292,18 +777,16 @@
     return;
   }
 
-  UseScratchRegisterScope temps(GetVIXLAssembler());
-  vixl32::Register temp = temps.Acquire();
   if (!skip_overflow_check) {
+    UseScratchRegisterScope temps(GetVIXLAssembler());
+    vixl32::Register temp = temps.Acquire();
     __ Sub(temp, sp, static_cast<int32_t>(GetStackOverflowReservedBytes(kArm)));
     // The load must immediately precede RecordPcInfo.
-    {
-      AssemblerAccurateScope aas(GetVIXLAssembler(),
-                                 kArmInstrMaxSizeInBytes,
-                                 CodeBufferCheckScope::kMaximumSize);
-      __ ldr(temp, MemOperand(temp));
-      RecordPcInfo(nullptr, 0);
-    }
+    AssemblerAccurateScope aas(GetVIXLAssembler(),
+                               kArmInstrMaxSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+    __ ldr(temp, MemOperand(temp));
+    RecordPcInfo(nullptr, 0);
   }
 
   __ Push(RegisterList(core_spill_mask_));
@@ -320,10 +803,7 @@
 
     __ Vpush(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_)));
     GetAssembler()->cfi().AdjustCFAOffset(kArmWordSize * POPCOUNT(fpu_spill_mask_));
-    GetAssembler()->cfi().RelOffsetForMany(DWARFReg(s0),
-                                           0,
-                                           fpu_spill_mask_,
-                                           kArmWordSize);
+    GetAssembler()->cfi().RelOffsetForMany(DWARFReg(s0), 0, fpu_spill_mask_, kArmWordSize);
   }
   int adjust = GetFrameSize() - FrameEntrySpillSize();
   __ Sub(sp, sp, adjust);
@@ -349,8 +829,7 @@
     __ Vpop(SRegisterList(vixl32::SRegister(first), POPCOUNT(fpu_spill_mask_)));
     GetAssembler()->cfi().AdjustCFAOffset(
         -static_cast<int>(kArmWordSize) * POPCOUNT(fpu_spill_mask_));
-    GetAssembler()->cfi().RestoreMany(DWARFReg(vixl32::SRegister(0)),
-                                      fpu_spill_mask_);
+    GetAssembler()->cfi().RestoreMany(DWARFReg(vixl32::SRegister(0)), fpu_spill_mask_);
   }
   // Pop LR into PC to return.
   DCHECK_NE(core_spill_mask_ & (1 << kLrCode), 0U);
@@ -364,29 +843,70 @@
   __ Bind(GetLabelOf(block));
 }
 
-void CodeGeneratorARMVIXL::MoveConstant(Location destination, int32_t value) {
-  TODO_VIXL32(FATAL);
+void CodeGeneratorARMVIXL::Move32(Location destination, Location source) {
+  if (source.Equals(destination)) {
+    return;
+  }
+  if (destination.IsRegister()) {
+    if (source.IsRegister()) {
+      __ Mov(RegisterFrom(destination), RegisterFrom(source));
+    } else if (source.IsFpuRegister()) {
+      __ Vmov(RegisterFrom(destination), SRegisterFrom(source));
+    } else {
+      GetAssembler()->LoadFromOffset(kLoadWord,
+                                     RegisterFrom(destination),
+                                     sp,
+                                     source.GetStackIndex());
+    }
+  } else if (destination.IsFpuRegister()) {
+    if (source.IsRegister()) {
+      __ Vmov(SRegisterFrom(destination), RegisterFrom(source));
+    } else if (source.IsFpuRegister()) {
+      __ Vmov(SRegisterFrom(destination), SRegisterFrom(source));
+    } else {
+      GetAssembler()->LoadSFromOffset(SRegisterFrom(destination), sp, source.GetStackIndex());
+    }
+  } else {
+    DCHECK(destination.IsStackSlot()) << destination;
+    if (source.IsRegister()) {
+      GetAssembler()->StoreToOffset(kStoreWord,
+                                    RegisterFrom(source),
+                                    sp,
+                                    destination.GetStackIndex());
+    } else if (source.IsFpuRegister()) {
+      GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex());
+    } else {
+      DCHECK(source.IsStackSlot()) << source;
+      UseScratchRegisterScope temps(GetVIXLAssembler());
+      vixl32::Register temp = temps.Acquire();
+      GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, source.GetStackIndex());
+      GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
+    }
+  }
+}
+
+void CodeGeneratorARMVIXL::MoveConstant(Location location, int32_t value) {
+  DCHECK(location.IsRegister());
+  __ Mov(RegisterFrom(location), value);
 }
 
 void CodeGeneratorARMVIXL::MoveLocation(Location dst, Location src, Primitive::Type dst_type) {
-  TODO_VIXL32(FATAL);
+  // TODO(VIXL): Maybe refactor to have the 'move' implementation here and use it in
+  // `ParallelMoveResolverARMVIXL::EmitMove`, as is done in the `arm64` backend.
+  HParallelMove move(GetGraph()->GetArena());
+  move.AddMove(src, dst, dst_type, nullptr);
+  GetMoveResolver()->EmitNativeCode(&move);
 }
 
 void CodeGeneratorARMVIXL::AddLocationAsTemp(Location location, LocationSummary* locations) {
-  TODO_VIXL32(FATAL);
-}
-
-uintptr_t CodeGeneratorARMVIXL::GetAddressOf(HBasicBlock* block) {
-  TODO_VIXL32(FATAL);
-  return 0;
-}
-
-void CodeGeneratorARMVIXL::GenerateImplicitNullCheck(HNullCheck* null_check) {
-  TODO_VIXL32(FATAL);
-}
-
-void CodeGeneratorARMVIXL::GenerateExplicitNullCheck(HNullCheck* null_check) {
-  TODO_VIXL32(FATAL);
+  if (location.IsRegister()) {
+    locations->AddTemp(location);
+  } else if (location.IsRegisterPair()) {
+    locations->AddTemp(LocationFrom(LowRegisterFrom(location)));
+    locations->AddTemp(LocationFrom(HighRegisterFrom(location)));
+  } else {
+    UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
+  }
 }
 
 void CodeGeneratorARMVIXL::InvokeRuntime(QuickEntrypointEnum entrypoint,
@@ -396,6 +916,8 @@
   ValidateInvokeRuntime(entrypoint, instruction, slow_path);
   GenerateInvokeRuntime(GetThreadOffset<kArmPointerSize>(entrypoint).Int32Value());
   if (EntrypointRequiresStackMap(entrypoint)) {
+    // TODO(VIXL): If necessary, use a scope to ensure we record the pc info immediately after the
+    // previous instruction.
     RecordPcInfo(instruction, dex_pc, slow_path);
   }
 }
@@ -412,47 +934,6 @@
   __ Blx(lr);
 }
 
-// Check if the desired_string_load_kind is supported. If it is, return it,
-// otherwise return a fall-back kind that should be used instead.
-HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind(
-      HLoadString::LoadKind desired_string_load_kind) {
-  TODO_VIXL32(FATAL);
-  return desired_string_load_kind;
-}
-
-// Check if the desired_class_load_kind is supported. If it is, return it,
-// otherwise return a fall-back kind that should be used instead.
-HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind(
-      HLoadClass::LoadKind desired_class_load_kind) {
-  TODO_VIXL32(FATAL);
-  return desired_class_load_kind;
-}
-
-// Check if the desired_dispatch_info is supported. If it is, return it,
-// otherwise return a fall-back info that should be used instead.
-HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStaticOrDirectDispatch(
-      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
-      HInvokeStaticOrDirect* invoke) {
-  TODO_VIXL32(FATAL);
-  return desired_dispatch_info;
-}
-
-// Generate a call to a static or direct method.
-void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
-                                                      Location temp) {
-  TODO_VIXL32(FATAL);
-}
-
-// Generate a call to a virtual method.
-void CodeGeneratorARMVIXL::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp) {
-  TODO_VIXL32(FATAL);
-}
-
-// Copy the result of a call into the given target.
-void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg, Primitive::Type type) {
-  TODO_VIXL32(FATAL);
-}
-
 void InstructionCodeGeneratorARMVIXL::HandleGoto(HInstruction* got, HBasicBlock* successor) {
   DCHECK(!successor->IsExitBlock());
   HBasicBlock* block = got->GetBlock();
@@ -480,6 +961,17 @@
   HandleGoto(got, got->GetSuccessor());
 }
 
+void LocationsBuilderARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) {
+  try_boundary->SetLocations(nullptr);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitTryBoundary(HTryBoundary* try_boundary) {
+  HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
+  if (!successor->IsExitBlock()) {
+    HandleGoto(try_boundary, successor);
+  }
+}
+
 void LocationsBuilderARMVIXL::VisitExit(HExit* exit) {
   exit->SetLocations(nullptr);
 }
@@ -512,16 +1004,14 @@
       __ Vcmp(F32, InputSRegisterAt(instruction, 0), 0.0);
     } else {
       DCHECK_EQ(type, Primitive::kPrimDouble);
-      __ Vcmp(F64, FromLowSToD(lhs_loc.AsFpuRegisterPairLow<vixl32::SRegister>()), 0.0);
+      __ Vcmp(F64, DRegisterFrom(lhs_loc), 0.0);
     }
   } else {
     if (type == Primitive::kPrimFloat) {
-      __ Vcmp(F32, InputSRegisterAt(instruction, 0), InputSRegisterAt(instruction, 1));
+      __ Vcmp(InputSRegisterAt(instruction, 0), InputSRegisterAt(instruction, 1));
     } else {
       DCHECK_EQ(type, Primitive::kPrimDouble);
-      __ Vcmp(F64,
-              FromLowSToD(lhs_loc.AsFpuRegisterPairLow<vixl32::SRegister>()),
-              FromLowSToD(rhs_loc.AsFpuRegisterPairLow<vixl32::SRegister>()));
+      __ Vcmp(DRegisterFrom(lhs_loc), DRegisterFrom(rhs_loc));
     }
   }
 }
@@ -542,8 +1032,8 @@
   Location right = locations->InAt(1);
   IfCondition if_cond = cond->GetCondition();
 
-  vixl32::Register left_high = left.AsRegisterPairHigh<vixl32::Register>();
-  vixl32::Register left_low = left.AsRegisterPairLow<vixl32::Register>();
+  vixl32::Register left_high = HighRegisterFrom(left);
+  vixl32::Register left_low = LowRegisterFrom(left);
   IfCondition true_high_cond = if_cond;
   IfCondition false_high_cond = cond->GetOppositeCondition();
   vixl32::Condition final_condition = ARMUnsignedCondition(if_cond);  // unsigned on lower part
@@ -598,8 +1088,8 @@
     // Must be equal high, so compare the lows.
     __ Cmp(left_low, val_low);
   } else {
-    vixl32::Register right_high = right.AsRegisterPairHigh<vixl32::Register>();
-    vixl32::Register right_low = right.AsRegisterPairLow<vixl32::Register>();
+    vixl32::Register right_high = HighRegisterFrom(right);
+    vixl32::Register right_low = LowRegisterFrom(right);
 
     __ Cmp(left_high, right_high);
     if (if_cond == kCondNE) {
@@ -740,15 +1230,65 @@
 void InstructionCodeGeneratorARMVIXL::VisitIf(HIf* if_instr) {
   HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
   HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
-  vixl32::Label* true_target =
-      codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
-          nullptr : codegen_->GetLabelOf(true_successor);
-  vixl32::Label* false_target =
-      codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
-          nullptr : codegen_->GetLabelOf(false_successor);
+  vixl32::Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
+      nullptr : codegen_->GetLabelOf(true_successor);
+  vixl32::Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
+      nullptr : codegen_->GetLabelOf(false_successor);
   GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
 }
 
+void LocationsBuilderARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) {
+  LocationSummary* locations = new (GetGraph()->GetArena())
+      LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
+  locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitDeoptimize(HDeoptimize* deoptimize) {
+  SlowPathCodeARMVIXL* slow_path =
+      deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathARMVIXL>(deoptimize);
+  GenerateTestAndBranch(deoptimize,
+                        /* condition_input_index */ 0,
+                        slow_path->GetEntryLabel(),
+                        /* false_target */ nullptr);
+}
+
+void LocationsBuilderARMVIXL::VisitSelect(HSelect* select) {
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
+  if (Primitive::IsFloatingPointType(select->GetType())) {
+    locations->SetInAt(0, Location::RequiresFpuRegister());
+    locations->SetInAt(1, Location::RequiresFpuRegister());
+  } else {
+    locations->SetInAt(0, Location::RequiresRegister());
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
+  if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
+    locations->SetInAt(2, Location::RequiresRegister());
+  }
+  locations->SetOut(Location::SameAsFirstInput());
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitSelect(HSelect* select) {
+  LocationSummary* locations = select->GetLocations();
+  vixl32::Label false_target;
+  GenerateTestAndBranch(select,
+                        /* condition_input_index */ 2,
+                        /* true_target */ nullptr,
+                        &false_target);
+  codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
+  __ Bind(&false_target);
+}
+
+void LocationsBuilderARMVIXL::VisitNativeDebugInfo(HNativeDebugInfo* info) {
+  new (GetGraph()->GetArena()) LocationSummary(info);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitNativeDebugInfo(HNativeDebugInfo*) {
+  // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
+}
+
 void CodeGeneratorARMVIXL::GenerateNop() {
   __ Nop();
 }
@@ -766,7 +1306,7 @@
       }
       break;
 
-    // TODO: https://android-review.googlesource.com/#/c/252265/
+    // TODO(VIXL): https://android-review.googlesource.com/#/c/252265/
     case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
       locations->SetInAt(0, Location::RequiresFpuRegister());
@@ -790,28 +1330,19 @@
     return;
   }
 
-  LocationSummary* locations = cond->GetLocations();
-  Location right = locations->InAt(1);
   vixl32::Register out = OutputRegister(cond);
   vixl32::Label true_label, false_label;
 
   switch (cond->InputAt(0)->GetType()) {
     default: {
       // Integer case.
-      if (right.IsRegister()) {
-        __ Cmp(InputRegisterAt(cond, 0), InputRegisterAt(cond, 1));
-      } else {
-        DCHECK(right.IsConstant());
-        __ Cmp(InputRegisterAt(cond, 0), CodeGenerator::GetInt32ValueOf(right.GetConstant()));
-      }
-      {
-        AssemblerAccurateScope aas(GetVIXLAssembler(),
-                                   kArmInstrMaxSizeInBytes * 3u,
-                                   CodeBufferCheckScope::kMaximumSize);
-        __ ite(ARMCondition(cond->GetCondition()));
-        __ mov(ARMCondition(cond->GetCondition()), OutputRegister(cond), 1);
-        __ mov(ARMCondition(cond->GetOppositeCondition()), OutputRegister(cond), 0);
-      }
+      __ Cmp(InputRegisterAt(cond, 0), InputOperandAt(cond, 1));
+      AssemblerAccurateScope aas(GetVIXLAssembler(),
+                                 kArmInstrMaxSizeInBytes * 3u,
+                                 CodeBufferCheckScope::kMaximumSize);
+      __ ite(ARMCondition(cond->GetCondition()));
+      __ mov(ARMCondition(cond->GetCondition()), OutputRegister(cond), 1);
+      __ mov(ARMCondition(cond->GetOppositeCondition()), OutputRegister(cond), 0);
       return;
     }
     case Primitive::kPrimLong:
@@ -928,6 +1459,16 @@
   // Will be generated at use site.
 }
 
+void LocationsBuilderARMVIXL::VisitNullConstant(HNullConstant* constant) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
+  // Will be generated at use site.
+}
+
 void LocationsBuilderARMVIXL::VisitLongConstant(HLongConstant* constant) {
   LocationSummary* locations =
       new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
@@ -938,6 +1479,28 @@
   // Will be generated at use site.
 }
 
+void LocationsBuilderARMVIXL::VisitFloatConstant(HFloatConstant* constant) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitFloatConstant(
+    HFloatConstant* constant ATTRIBUTE_UNUSED) {
+  // Will be generated at use site.
+}
+
+void LocationsBuilderARMVIXL::VisitDoubleConstant(HDoubleConstant* constant) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
+  locations->SetOut(Location::ConstantLocation(constant));
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitDoubleConstant(
+    HDoubleConstant* constant ATTRIBUTE_UNUSED) {
+  // Will be generated at use site.
+}
+
 void LocationsBuilderARMVIXL::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
   memory_barrier->SetLocations(nullptr);
 }
@@ -964,6 +1527,205 @@
   codegen_->GenerateFrameExit();
 }
 
+void LocationsBuilderARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
+  // The trampoline uses the same calling convention as dex calling conventions,
+  // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
+  // the method_idx.
+  HandleInvoke(invoke);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
+  codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
+}
+
+void LocationsBuilderARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
+
+  IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
+  if (intrinsic.TryDispatch(invoke)) {
+    if (invoke->GetLocations()->CanCall() && invoke->HasPcRelativeDexCache()) {
+      invoke->GetLocations()->SetInAt(invoke->GetSpecialInputIndex(), Location::Any());
+    }
+    return;
+  }
+
+  HandleInvoke(invoke);
+
+  // TODO(VIXL): invoke->HasPcRelativeDexCache()
+}
+
+static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorARMVIXL* codegen) {
+  if (invoke->GetLocations()->Intrinsified()) {
+    IntrinsicCodeGeneratorARMVIXL intrinsic(codegen);
+    intrinsic.Dispatch(invoke);
+    return true;
+  }
+  return false;
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
+  // Explicit clinit checks triggered by static invokes must have been pruned by
+  // art::PrepareForRegisterAllocation.
+  DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
+
+  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    return;
+  }
+
+  LocationSummary* locations = invoke->GetLocations();
+  DCHECK(locations->HasTemps());
+  codegen_->GenerateStaticOrDirectCall(invoke, locations->GetTemp(0));
+  // TODO(VIXL): If necessary, use a scope to ensure we record the pc info immediately after the
+  // previous instruction.
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void LocationsBuilderARMVIXL::HandleInvoke(HInvoke* invoke) {
+  InvokeDexCallingConventionVisitorARM calling_convention_visitor;
+  CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
+}
+
+void LocationsBuilderARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  IntrinsicLocationsBuilderARMVIXL intrinsic(codegen_);
+  if (intrinsic.TryDispatch(invoke)) {
+    return;
+  }
+
+  HandleInvoke(invoke);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitInvokeVirtual(HInvokeVirtual* invoke) {
+  if (TryGenerateIntrinsicCode(invoke, codegen_)) {
+    return;
+  }
+
+  codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
+  DCHECK(!codegen_->IsLeafMethod());
+  // TODO(VIXL): If necessary, use a scope to ensure we record the pc info immediately after the
+  // previous instruction.
+  codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+}
+
+void LocationsBuilderARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
+  HandleInvoke(invoke);
+  // Add the hidden argument.
+  invoke->GetLocations()->AddTemp(LocationFrom(r12));
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitInvokeInterface(HInvokeInterface* invoke) {
+  // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
+  LocationSummary* locations = invoke->GetLocations();
+  vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+  vixl32::Register hidden_reg = RegisterFrom(locations->GetTemp(1));
+  Location receiver = locations->InAt(0);
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+
+  DCHECK(!receiver.IsStackSlot());
+
+  // /* HeapReference<Class> */ temp = receiver->klass_
+  GetAssembler()->LoadFromOffset(kLoadWord, temp, RegisterFrom(receiver), class_offset);
+
+  codegen_->MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
+  GetAssembler()->MaybeUnpoisonHeapReference(temp);
+  GetAssembler()->LoadFromOffset(kLoadWord,
+                                 temp,
+                                 temp,
+                                 mirror::Class::ImtPtrOffset(kArmPointerSize).Uint32Value());
+  uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
+      invoke->GetImtIndex(), kArmPointerSize));
+  // temp = temp->GetImtEntryAt(method_offset);
+  GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset);
+  uint32_t entry_point =
+      ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value();
+  // LR = temp->GetEntryPoint();
+  GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point);
+
+  // Set the hidden (in r12) argument. It is done here, right before a BLX to prevent other
+  // instruction from clobbering it as they might use r12 as a scratch register.
+  DCHECK(hidden_reg.Is(r12));
+  __ Mov(hidden_reg, invoke->GetDexMethodIndex());
+
+  {
+    AssemblerAccurateScope aas(GetVIXLAssembler(),
+                               kArmInstrMaxSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+    // LR();
+    __ blx(lr);
+    DCHECK(!codegen_->IsLeafMethod());
+    codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitNeg(HNeg* neg) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
+  switch (neg->GetResultType()) {
+    case Primitive::kPrimInt: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+    }
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+      break;
+    }
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitNeg(HNeg* neg) {
+  LocationSummary* locations = neg->GetLocations();
+  Location out = locations->Out();
+  Location in = locations->InAt(0);
+  switch (neg->GetResultType()) {
+    case Primitive::kPrimInt:
+      __ Rsb(OutputRegister(neg), InputRegisterAt(neg, 0), 0);
+      break;
+
+    case Primitive::kPrimLong:
+      // out.lo = 0 - in.lo (and update the carry/borrow (C) flag)
+      __ Rsbs(LowRegisterFrom(out), LowRegisterFrom(in), 0);
+      // We cannot emit an RSC (Reverse Subtract with Carry)
+      // instruction here, as it does not exist in the Thumb-2
+      // instruction set.  We use the following approach
+      // using SBC and SUB instead.
+      //
+      // out.hi = -C
+      __ Sbc(HighRegisterFrom(out), HighRegisterFrom(out), HighRegisterFrom(out));
+      // out.hi = out.hi - in.hi
+      __ Sub(HighRegisterFrom(out), HighRegisterFrom(out), HighRegisterFrom(in));
+      break;
+
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      // TODO(VIXL): Consider introducing an InputVRegister()
+      // helper function (equivalent to InputRegister()).
+      __ Vneg(OutputVRegister(neg), InputVRegisterAt(neg, 0));
+      break;
+
+    default:
+      LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
+  }
+}
+
 void LocationsBuilderARMVIXL::VisitTypeConversion(HTypeConversion* conversion) {
   Primitive::Type result_type = conversion->GetResultType();
   Primitive::Type input_type = conversion->GetInputType();
@@ -1067,20 +1829,18 @@
 
         case Primitive::kPrimFloat: {
           // Processing a Dex `float-to-long' instruction.
-          InvokeRuntimeCallingConvention calling_convention;
-          locations->SetInAt(0, Location::FpuRegisterLocation(
-              calling_convention.GetFpuRegisterAt(0)));
-          locations->SetOut(Location::RegisterPairLocation(R0, R1));
+          InvokeRuntimeCallingConventionARMVIXL calling_convention;
+          locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
+          locations->SetOut(LocationFrom(r0, r1));
           break;
         }
 
         case Primitive::kPrimDouble: {
           // Processing a Dex `double-to-long' instruction.
-          InvokeRuntimeCallingConvention calling_convention;
-          locations->SetInAt(0, Location::FpuRegisterPairLocation(
-              calling_convention.GetFpuRegisterAt(0),
-              calling_convention.GetFpuRegisterAt(1)));
-          locations->SetOut(Location::RegisterPairLocation(R0, R1));
+          InvokeRuntimeCallingConventionARMVIXL calling_convention;
+          locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0),
+                                             calling_convention.GetFpuRegisterAt(1)));
+          locations->SetOut(LocationFrom(r0, r1));
           break;
         }
 
@@ -1125,10 +1885,10 @@
 
         case Primitive::kPrimLong: {
           // Processing a Dex `long-to-float' instruction.
-          InvokeRuntimeCallingConvention calling_convention;
-          locations->SetInAt(0, Location::RegisterPairLocation(
-              calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
-          locations->SetOut(Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
+          InvokeRuntimeCallingConventionARMVIXL calling_convention;
+          locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0),
+                                             calling_convention.GetRegisterAt(1)));
+          locations->SetOut(LocationFrom(calling_convention.GetFpuRegisterAt(0)));
           break;
         }
 
@@ -1195,7 +1955,7 @@
       switch (input_type) {
         case Primitive::kPrimLong:
           // Type conversion from long to byte is a result of code transformations.
-          __ Sbfx(OutputRegister(conversion), in.AsRegisterPairLow<vixl32::Register>(), 0, 8);
+          __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 8);
           break;
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
@@ -1216,7 +1976,7 @@
       switch (input_type) {
         case Primitive::kPrimLong:
           // Type conversion from long to short is a result of code transformations.
-          __ Sbfx(OutputRegister(conversion), in.AsRegisterPairLow<vixl32::Register>(), 0, 16);
+          __ Sbfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16);
           break;
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
@@ -1239,7 +1999,7 @@
           // Processing a Dex `long-to-int' instruction.
           DCHECK(out.IsRegister());
           if (in.IsRegisterPair()) {
-            __ Mov(OutputRegister(conversion), in.AsRegisterPairLow<vixl32::Register>());
+            __ Mov(OutputRegister(conversion), LowRegisterFrom(in));
           } else if (in.IsDoubleStackSlot()) {
             GetAssembler()->LoadFromOffset(kLoadWord,
                                            OutputRegister(conversion),
@@ -1255,17 +2015,16 @@
 
         case Primitive::kPrimFloat: {
           // Processing a Dex `float-to-int' instruction.
-          vixl32::SRegister temp = locations->GetTemp(0).AsFpuRegisterPairLow<vixl32::SRegister>();
-          __ Vcvt(I32, F32, temp, InputSRegisterAt(conversion, 0));
+          vixl32::SRegister temp = LowSRegisterFrom(locations->GetTemp(0));
+          __ Vcvt(S32, F32, temp, InputSRegisterAt(conversion, 0));
           __ Vmov(OutputRegister(conversion), temp);
           break;
         }
 
         case Primitive::kPrimDouble: {
           // Processing a Dex `double-to-int' instruction.
-          vixl32::SRegister temp_s =
-              locations->GetTemp(0).AsFpuRegisterPairLow<vixl32::SRegister>();
-          __ Vcvt(I32, F64, temp_s, FromLowSToD(in.AsFpuRegisterPairLow<vixl32::SRegister>()));
+          vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0));
+          __ Vcvt(S32, F64, temp_s, DRegisterFrom(in));
           __ Vmov(OutputRegister(conversion), temp_s);
           break;
         }
@@ -1287,11 +2046,9 @@
           // Processing a Dex `int-to-long' instruction.
           DCHECK(out.IsRegisterPair());
           DCHECK(in.IsRegister());
-          __ Mov(out.AsRegisterPairLow<vixl32::Register>(), InputRegisterAt(conversion, 0));
+          __ Mov(LowRegisterFrom(out), InputRegisterAt(conversion, 0));
           // Sign extension.
-          __ Asr(out.AsRegisterPairHigh<vixl32::Register>(),
-                 out.AsRegisterPairLow<vixl32::Register>(),
-                 31);
+          __ Asr(HighRegisterFrom(out), LowRegisterFrom(out), 31);
           break;
 
         case Primitive::kPrimFloat:
@@ -1316,7 +2073,7 @@
       switch (input_type) {
         case Primitive::kPrimLong:
           // Type conversion from long to char is a result of code transformations.
-          __ Ubfx(OutputRegister(conversion), in.AsRegisterPairLow<vixl32::Register>(), 0, 16);
+          __ Ubfx(OutputRegister(conversion), LowRegisterFrom(in), 0, 16);
           break;
         case Primitive::kPrimBoolean:
           // Boolean input is a result of code transformations.
@@ -1343,7 +2100,7 @@
         case Primitive::kPrimChar: {
           // Processing a Dex `int-to-float' instruction.
           __ Vmov(OutputSRegister(conversion), InputRegisterAt(conversion, 0));
-          __ Vcvt(F32, I32, OutputSRegister(conversion), OutputSRegister(conversion));
+          __ Vcvt(F32, S32, OutputSRegister(conversion), OutputSRegister(conversion));
           break;
         }
 
@@ -1355,10 +2112,7 @@
 
         case Primitive::kPrimDouble:
           // Processing a Dex `double-to-float' instruction.
-          __ Vcvt(F32,
-                  F64,
-                  OutputSRegister(conversion),
-                  FromLowSToD(in.AsFpuRegisterPairLow<vixl32::SRegister>()));
+          __ Vcvt(F32, F64, OutputSRegister(conversion), DRegisterFrom(in));
           break;
 
         default:
@@ -1376,37 +2130,26 @@
         case Primitive::kPrimInt:
         case Primitive::kPrimChar: {
           // Processing a Dex `int-to-double' instruction.
-          __ Vmov(out.AsFpuRegisterPairLow<vixl32::SRegister>(), InputRegisterAt(conversion, 0));
-          __ Vcvt(F64,
-                  I32,
-                  FromLowSToD(out.AsFpuRegisterPairLow<vixl32::SRegister>()),
-                  out.AsFpuRegisterPairLow<vixl32::SRegister>());
+          __ Vmov(LowSRegisterFrom(out), InputRegisterAt(conversion, 0));
+          __ Vcvt(F64, S32, DRegisterFrom(out), LowSRegisterFrom(out));
           break;
         }
 
         case Primitive::kPrimLong: {
           // Processing a Dex `long-to-double' instruction.
-          vixl32::Register low = in.AsRegisterPairLow<vixl32::Register>();
-          vixl32::Register high = in.AsRegisterPairHigh<vixl32::Register>();
-
-          vixl32::SRegister out_s = out.AsFpuRegisterPairLow<vixl32::SRegister>();
-          vixl32::DRegister out_d = FromLowSToD(out_s);
-
-          vixl32::SRegister temp_s =
-              locations->GetTemp(0).AsFpuRegisterPairLow<vixl32::SRegister>();
-          vixl32::DRegister temp_d = FromLowSToD(temp_s);
-
-          vixl32::SRegister constant_s =
-              locations->GetTemp(1).AsFpuRegisterPairLow<vixl32::SRegister>();
-          vixl32::DRegister constant_d = FromLowSToD(constant_s);
+          vixl32::Register low = LowRegisterFrom(in);
+          vixl32::Register high = HighRegisterFrom(in);
+          vixl32::SRegister out_s = LowSRegisterFrom(out);
+          vixl32::DRegister out_d = DRegisterFrom(out);
+          vixl32::SRegister temp_s = LowSRegisterFrom(locations->GetTemp(0));
+          vixl32::DRegister temp_d = DRegisterFrom(locations->GetTemp(0));
+          vixl32::DRegister constant_d = DRegisterFrom(locations->GetTemp(1));
 
           // temp_d = int-to-double(high)
           __ Vmov(temp_s, high);
-          __ Vcvt(F64, I32, temp_d, temp_s);
+          __ Vcvt(F64, S32, temp_d, temp_s);
           // constant_d = k2Pow32EncodingForDouble
-          __ Vmov(F64,
-                  constant_d,
-                  vixl32::DOperand(bit_cast<double, int64_t>(k2Pow32EncodingForDouble)));
+          __ Vmov(constant_d, bit_cast<double, int64_t>(k2Pow32EncodingForDouble));
           // out_d = unsigned-to-double(low)
           __ Vmov(out_s, low);
           __ Vcvt(F64, U32, out_d, out_s);
@@ -1417,10 +2160,7 @@
 
         case Primitive::kPrimFloat:
           // Processing a Dex `float-to-double' instruction.
-          __ Vcvt(F64,
-                  F32,
-                  FromLowSToD(out.AsFpuRegisterPairLow<vixl32::SRegister>()),
-                  InputSRegisterAt(conversion, 0));
+          __ Vcvt(F64, F32, DRegisterFrom(out), InputSRegisterAt(conversion, 0));
           break;
 
         default:
@@ -1446,7 +2186,7 @@
       break;
     }
 
-    // TODO: https://android-review.googlesource.com/#/c/254144/
+    // TODO(VIXL): https://android-review.googlesource.com/#/c/254144/
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RequiresRegister());
@@ -1479,28 +2219,17 @@
       }
       break;
 
-    // TODO: https://android-review.googlesource.com/#/c/254144/
+    // TODO(VIXL): https://android-review.googlesource.com/#/c/254144/
     case Primitive::kPrimLong: {
       DCHECK(second.IsRegisterPair());
-      __ Adds(out.AsRegisterPairLow<vixl32::Register>(),
-              first.AsRegisterPairLow<vixl32::Register>(),
-              Operand(second.AsRegisterPairLow<vixl32::Register>()));
-      __ Adc(out.AsRegisterPairHigh<vixl32::Register>(),
-             first.AsRegisterPairHigh<vixl32::Register>(),
-             second.AsRegisterPairHigh<vixl32::Register>());
+      __ Adds(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second));
+      __ Adc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second));
       break;
     }
 
-    case Primitive::kPrimFloat: {
-      __ Vadd(F32, OutputSRegister(add), InputSRegisterAt(add, 0), InputSRegisterAt(add, 1));
-      }
-      break;
-
+    case Primitive::kPrimFloat:
     case Primitive::kPrimDouble:
-      __ Vadd(F64,
-              FromLowSToD(out.AsFpuRegisterPairLow<vixl32::SRegister>()),
-              FromLowSToD(first.AsFpuRegisterPairLow<vixl32::SRegister>()),
-              FromLowSToD(second.AsFpuRegisterPairLow<vixl32::SRegister>()));
+      __ Vadd(OutputVRegister(add), InputVRegisterAt(add, 0), InputVRegisterAt(add, 1));
       break;
 
     default:
@@ -1519,7 +2248,7 @@
       break;
     }
 
-    // TODO: https://android-review.googlesource.com/#/c/254144/
+    // TODO(VIXL): https://android-review.googlesource.com/#/c/254144/
     case Primitive::kPrimLong: {
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RequiresRegister());
@@ -1545,40 +2274,22 @@
   Location second = locations->InAt(1);
   switch (sub->GetResultType()) {
     case Primitive::kPrimInt: {
-      if (second.IsRegister()) {
-        __ Sub(OutputRegister(sub), InputRegisterAt(sub, 0), InputRegisterAt(sub, 1));
-      } else {
-        __ Sub(OutputRegister(sub),
-               InputRegisterAt(sub, 0),
-               second.GetConstant()->AsIntConstant()->GetValue());
-      }
+      __ Sub(OutputRegister(sub), InputRegisterAt(sub, 0), InputOperandAt(sub, 1));
       break;
     }
 
-    // TODO: https://android-review.googlesource.com/#/c/254144/
+    // TODO(VIXL): https://android-review.googlesource.com/#/c/254144/
     case Primitive::kPrimLong: {
       DCHECK(second.IsRegisterPair());
-      __ Subs(out.AsRegisterPairLow<vixl32::Register>(),
-              first.AsRegisterPairLow<vixl32::Register>(),
-              Operand(second.AsRegisterPairLow<vixl32::Register>()));
-      __ Sbc(out.AsRegisterPairHigh<vixl32::Register>(),
-             first.AsRegisterPairHigh<vixl32::Register>(),
-             Operand(second.AsRegisterPairHigh<vixl32::Register>()));
+      __ Subs(LowRegisterFrom(out), LowRegisterFrom(first), LowRegisterFrom(second));
+      __ Sbc(HighRegisterFrom(out), HighRegisterFrom(first), HighRegisterFrom(second));
       break;
     }
 
-    case Primitive::kPrimFloat: {
-      __ Vsub(F32, OutputSRegister(sub), InputSRegisterAt(sub, 0), InputSRegisterAt(sub, 1));
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      __ Vsub(OutputVRegister(sub), InputVRegisterAt(sub, 0), InputVRegisterAt(sub, 1));
       break;
-    }
-
-    case Primitive::kPrimDouble: {
-      __ Vsub(F64,
-              FromLowSToD(out.AsFpuRegisterPairLow<vixl32::SRegister>()),
-              FromLowSToD(first.AsFpuRegisterPairLow<vixl32::SRegister>()),
-              FromLowSToD(second.AsFpuRegisterPairLow<vixl32::SRegister>()));
-      break;
-    }
 
     default:
       LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
@@ -1621,12 +2332,12 @@
       break;
     }
     case Primitive::kPrimLong: {
-      vixl32::Register out_hi = out.AsRegisterPairHigh<vixl32::Register>();
-      vixl32::Register out_lo = out.AsRegisterPairLow<vixl32::Register>();
-      vixl32::Register in1_hi = first.AsRegisterPairHigh<vixl32::Register>();
-      vixl32::Register in1_lo = first.AsRegisterPairLow<vixl32::Register>();
-      vixl32::Register in2_hi = second.AsRegisterPairHigh<vixl32::Register>();
-      vixl32::Register in2_lo = second.AsRegisterPairLow<vixl32::Register>();
+      vixl32::Register out_hi = HighRegisterFrom(out);
+      vixl32::Register out_lo = LowRegisterFrom(out);
+      vixl32::Register in1_hi = HighRegisterFrom(first);
+      vixl32::Register in1_lo = LowRegisterFrom(first);
+      vixl32::Register in2_hi = HighRegisterFrom(second);
+      vixl32::Register in2_lo = LowRegisterFrom(second);
 
       // Extra checks to protect caused by the existence of R1_R2.
       // The algorithm is wrong if out.hi is either in1.lo or in2.lo:
@@ -1649,82 +2360,25 @@
       // out.lo <- (in1.lo * in2.lo)[31:0];
       __ Umull(out_lo, temp, in1_lo, in2_lo);
       // out.hi <- in2.hi * in1.lo +  in2.lo * in1.hi + (in1.lo * in2.lo)[63:32]
-      __ Add(out_hi, out_hi, Operand(temp));
+      __ Add(out_hi, out_hi, temp);
       break;
     }
 
-    case Primitive::kPrimFloat: {
-      __ Vmul(F32, OutputSRegister(mul), InputSRegisterAt(mul, 0), InputSRegisterAt(mul, 1));
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      __ Vmul(OutputVRegister(mul), InputVRegisterAt(mul, 0), InputVRegisterAt(mul, 1));
       break;
-    }
-
-    case Primitive::kPrimDouble: {
-      __ Vmul(F64,
-              FromLowSToD(out.AsFpuRegisterPairLow<vixl32::SRegister>()),
-              FromLowSToD(first.AsFpuRegisterPairLow<vixl32::SRegister>()),
-              FromLowSToD(second.AsFpuRegisterPairLow<vixl32::SRegister>()));
-      break;
-    }
 
     default:
       LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
   }
 }
 
-void LocationsBuilderARMVIXL::VisitNot(HNot* not_) {
-  LocationSummary* locations =
-      new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall);
-  locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
-}
-
-void InstructionCodeGeneratorARMVIXL::VisitNot(HNot* not_) {
-  LocationSummary* locations = not_->GetLocations();
-  Location out = locations->Out();
-  Location in = locations->InAt(0);
-  switch (not_->GetResultType()) {
-    case Primitive::kPrimInt:
-      __ Mvn(OutputRegister(not_), InputRegisterAt(not_, 0));
-      break;
-
-    case Primitive::kPrimLong:
-      __ Mvn(out.AsRegisterPairLow<vixl32::Register>(),
-             Operand(in.AsRegisterPairLow<vixl32::Register>()));
-      __ Mvn(out.AsRegisterPairHigh<vixl32::Register>(),
-             Operand(in.AsRegisterPairHigh<vixl32::Register>()));
-      break;
-
-    default:
-      LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
-  }
-}
-
-void CodeGeneratorARMVIXL::GenerateMemoryBarrier(MemBarrierKind kind) {
-  // TODO (ported from quick): revisit ARM barrier kinds.
-  DmbOptions flavor = DmbOptions::ISH;  // Quiet C++ warnings.
-  switch (kind) {
-    case MemBarrierKind::kAnyStore:
-    case MemBarrierKind::kLoadAny:
-    case MemBarrierKind::kAnyAny: {
-      flavor = DmbOptions::ISH;
-      break;
-    }
-    case MemBarrierKind::kStoreStore: {
-      flavor = DmbOptions::ISHST;
-      break;
-    }
-    default:
-      LOG(FATAL) << "Unexpected memory barrier " << kind;
-  }
-  __ Dmb(flavor);
-}
-
 void InstructionCodeGeneratorARMVIXL::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
   DCHECK(instruction->IsDiv() || instruction->IsRem());
   DCHECK(instruction->GetResultType() == Primitive::kPrimInt);
 
-  LocationSummary* locations = instruction->GetLocations();
-  Location second = locations->InAt(1);
+  Location second = instruction->GetLocations()->InAt(1);
   DCHECK(second.IsConstant());
 
   vixl32::Register out = OutputRegister(instruction);
@@ -1753,7 +2407,7 @@
 
   vixl32::Register out = OutputRegister(instruction);
   vixl32::Register dividend = InputRegisterAt(instruction, 0);
-  vixl32::Register temp = locations->GetTemp(0).AsRegister<vixl32::Register>();
+  vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
   int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
   uint32_t abs_imm = static_cast<uint32_t>(AbsOrMin(imm));
   int ctz_imm = CTZ(abs_imm);
@@ -1764,16 +2418,16 @@
     __ Asr(temp, dividend, 31);
     __ Lsr(temp, temp, 32 - ctz_imm);
   }
-  __ Add(out, temp, Operand(dividend));
+  __ Add(out, temp, dividend);
 
   if (instruction->IsDiv()) {
     __ Asr(out, out, ctz_imm);
     if (imm < 0) {
-      __ Rsb(out, out, Operand(0));
+      __ Rsb(out, out, 0);
     }
   } else {
     __ Ubfx(out, out, 0, ctz_imm);
-    __ Sub(out, out, Operand(temp));
+    __ Sub(out, out, temp);
   }
 }
 
@@ -1787,8 +2441,8 @@
 
   vixl32::Register out = OutputRegister(instruction);
   vixl32::Register dividend = InputRegisterAt(instruction, 0);
-  vixl32::Register temp1 = locations->GetTemp(0).AsRegister<vixl32::Register>();
-  vixl32::Register temp2 = locations->GetTemp(1).AsRegister<vixl32::Register>();
+  vixl32::Register temp1 = RegisterFrom(locations->GetTemp(0));
+  vixl32::Register temp2 = RegisterFrom(locations->GetTemp(1));
   int64_t imm = second.GetConstant()->AsIntConstant()->GetValue();
 
   int64_t magic;
@@ -1799,9 +2453,9 @@
   __ Smull(temp2, temp1, dividend, temp1);
 
   if (imm > 0 && magic < 0) {
-    __ Add(temp1, temp1, Operand(dividend));
+    __ Add(temp1, temp1, dividend);
   } else if (imm < 0 && magic > 0) {
-    __ Sub(temp1, temp1, Operand(dividend));
+    __ Sub(temp1, temp1, dividend);
   }
 
   if (shift != 0) {
@@ -1823,8 +2477,7 @@
   DCHECK(instruction->IsDiv() || instruction->IsRem());
   DCHECK(instruction->GetResultType() == Primitive::kPrimInt);
 
-  LocationSummary* locations = instruction->GetLocations();
-  Location second = locations->InAt(1);
+  Location second = instruction->GetLocations()->InAt(1);
   DCHECK(second.IsConstant());
 
   int32_t imm = second.GetConstant()->AsIntConstant()->GetValue();
@@ -1875,12 +2528,22 @@
         locations->SetInAt(1, Location::RequiresRegister());
         locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
       } else {
-        TODO_VIXL32(FATAL);
+        InvokeRuntimeCallingConventionARMVIXL calling_convention;
+        locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+        locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+        // Note: divrem will compute both the quotient and the remainder as the pair R0 and R1, but
+        //       we only need the former.
+        locations->SetOut(LocationFrom(r0));
       }
       break;
     }
     case Primitive::kPrimLong: {
-      TODO_VIXL32(FATAL);
+      InvokeRuntimeCallingConventionARMVIXL calling_convention;
+      locations->SetInAt(0, LocationFrom(
+          calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+      locations->SetInAt(1, LocationFrom(
+          calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
+      locations->SetOut(LocationFrom(r0, r1));
       break;
     }
     case Primitive::kPrimFloat:
@@ -1897,47 +2560,187 @@
 }
 
 void InstructionCodeGeneratorARMVIXL::VisitDiv(HDiv* div) {
-  LocationSummary* locations = div->GetLocations();
-  Location out = locations->Out();
-  Location first = locations->InAt(0);
-  Location second = locations->InAt(1);
+  Location lhs = div->GetLocations()->InAt(0);
+  Location rhs = div->GetLocations()->InAt(1);
 
   switch (div->GetResultType()) {
     case Primitive::kPrimInt: {
-      if (second.IsConstant()) {
+      if (rhs.IsConstant()) {
         GenerateDivRemConstantIntegral(div);
       } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
         __ Sdiv(OutputRegister(div), InputRegisterAt(div, 0), InputRegisterAt(div, 1));
       } else {
-        TODO_VIXL32(FATAL);
+        InvokeRuntimeCallingConventionARMVIXL calling_convention;
+        DCHECK(calling_convention.GetRegisterAt(0).Is(RegisterFrom(lhs)));
+        DCHECK(calling_convention.GetRegisterAt(1).Is(RegisterFrom(rhs)));
+        DCHECK(r0.Is(OutputRegister(div)));
+
+        codegen_->InvokeRuntime(kQuickIdivmod, div, div->GetDexPc());
+        CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
       }
       break;
     }
 
     case Primitive::kPrimLong: {
-      TODO_VIXL32(FATAL);
+      InvokeRuntimeCallingConventionARMVIXL calling_convention;
+      DCHECK(calling_convention.GetRegisterAt(0).Is(LowRegisterFrom(lhs)));
+      DCHECK(calling_convention.GetRegisterAt(1).Is(HighRegisterFrom(lhs)));
+      DCHECK(calling_convention.GetRegisterAt(2).Is(LowRegisterFrom(rhs)));
+      DCHECK(calling_convention.GetRegisterAt(3).Is(HighRegisterFrom(rhs)));
+      DCHECK(LowRegisterFrom(div->GetLocations()->Out()).Is(r0));
+      DCHECK(HighRegisterFrom(div->GetLocations()->Out()).Is(r1));
+
+      codegen_->InvokeRuntime(kQuickLdiv, div, div->GetDexPc());
+      CheckEntrypointTypes<kQuickLdiv, int64_t, int64_t, int64_t>();
       break;
     }
 
-    case Primitive::kPrimFloat: {
-      __ Vdiv(F32, OutputSRegister(div), InputSRegisterAt(div, 0), InputSRegisterAt(div, 1));
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      __ Vdiv(OutputVRegister(div), InputVRegisterAt(div, 0), InputVRegisterAt(div, 1));
       break;
-    }
-
-    case Primitive::kPrimDouble: {
-      __ Vdiv(F64,
-              FromLowSToD(out.AsFpuRegisterPairLow<vixl32::SRegister>()),
-              FromLowSToD(first.AsFpuRegisterPairLow<vixl32::SRegister>()),
-              FromLowSToD(second.AsFpuRegisterPairLow<vixl32::SRegister>()));
-      break;
-    }
 
     default:
       LOG(FATAL) << "Unexpected div type " << div->GetResultType();
   }
 }
 
+void LocationsBuilderARMVIXL::VisitRem(HRem* rem) {
+  Primitive::Type type = rem->GetResultType();
+
+  // Most remainders are implemented in the runtime.
+  LocationSummary::CallKind call_kind = LocationSummary::kCallOnMainOnly;
+  if (rem->GetResultType() == Primitive::kPrimInt && rem->InputAt(1)->IsConstant()) {
+    // sdiv will be replaced by other instruction sequence.
+    call_kind = LocationSummary::kNoCall;
+  } else if ((rem->GetResultType() == Primitive::kPrimInt)
+             && codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
+    // Have hardware divide instruction for int, do it with three instructions.
+    call_kind = LocationSummary::kNoCall;
+  }
+
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(rem, call_kind);
+
+  switch (type) {
+    case Primitive::kPrimInt: {
+      if (rem->InputAt(1)->IsConstant()) {
+        locations->SetInAt(0, Location::RequiresRegister());
+        locations->SetInAt(1, Location::ConstantLocation(rem->InputAt(1)->AsConstant()));
+        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+        int32_t value = rem->InputAt(1)->AsIntConstant()->GetValue();
+        if (value == 1 || value == 0 || value == -1) {
+          // No temp register required.
+        } else {
+          locations->AddTemp(Location::RequiresRegister());
+          if (!IsPowerOfTwo(AbsOrMin(value))) {
+            locations->AddTemp(Location::RequiresRegister());
+          }
+        }
+      } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
+        locations->SetInAt(0, Location::RequiresRegister());
+        locations->SetInAt(1, Location::RequiresRegister());
+        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+        locations->AddTemp(Location::RequiresRegister());
+      } else {
+        InvokeRuntimeCallingConventionARMVIXL calling_convention;
+        locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+        locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+        // Note: divrem will compute both the quotient and the remainder as the pair R0 and R1, but
+        //       we only need the latter.
+        locations->SetOut(LocationFrom(r1));
+      }
+      break;
+    }
+    case Primitive::kPrimLong: {
+      InvokeRuntimeCallingConventionARMVIXL calling_convention;
+      locations->SetInAt(0, LocationFrom(
+          calling_convention.GetRegisterAt(0), calling_convention.GetRegisterAt(1)));
+      locations->SetInAt(1, LocationFrom(
+          calling_convention.GetRegisterAt(2), calling_convention.GetRegisterAt(3)));
+      // The runtime helper puts the output in R2,R3.
+      locations->SetOut(LocationFrom(r2, r3));
+      break;
+    }
+    case Primitive::kPrimFloat: {
+      InvokeRuntimeCallingConventionARMVIXL calling_convention;
+      locations->SetInAt(0, LocationFrom(calling_convention.GetFpuRegisterAt(0)));
+      locations->SetInAt(1, LocationFrom(calling_convention.GetFpuRegisterAt(1)));
+      locations->SetOut(LocationFrom(s0));
+      break;
+    }
+
+    case Primitive::kPrimDouble: {
+      InvokeRuntimeCallingConventionARMVIXL calling_convention;
+      locations->SetInAt(0, LocationFrom(
+          calling_convention.GetFpuRegisterAt(0), calling_convention.GetFpuRegisterAt(1)));
+      locations->SetInAt(1, LocationFrom(
+          calling_convention.GetFpuRegisterAt(2), calling_convention.GetFpuRegisterAt(3)));
+      locations->SetOut(LocationFrom(s0, s1));
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected rem type " << type;
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitRem(HRem* rem) {
+  LocationSummary* locations = rem->GetLocations();
+  Location second = locations->InAt(1);
+
+  Primitive::Type type = rem->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt: {
+        vixl32::Register reg1 = InputRegisterAt(rem, 0);
+        vixl32::Register out_reg = OutputRegister(rem);
+        if (second.IsConstant()) {
+          GenerateDivRemConstantIntegral(rem);
+        } else if (codegen_->GetInstructionSetFeatures().HasDivideInstruction()) {
+        vixl32::Register reg2 = RegisterFrom(second);
+        vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+
+        // temp = reg1 / reg2  (integer division)
+        // dest = reg1 - temp * reg2
+        __ Sdiv(temp, reg1, reg2);
+        __ Mls(out_reg, temp, reg2, reg1);
+      } else {
+        InvokeRuntimeCallingConventionARMVIXL calling_convention;
+        DCHECK(reg1.Is(calling_convention.GetRegisterAt(0)));
+        DCHECK(RegisterFrom(second).Is(calling_convention.GetRegisterAt(1)));
+        DCHECK(out_reg.Is(r1));
+
+        codegen_->InvokeRuntime(kQuickIdivmod, rem, rem->GetDexPc());
+        CheckEntrypointTypes<kQuickIdivmod, int32_t, int32_t, int32_t>();
+      }
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      codegen_->InvokeRuntime(kQuickLmod, rem, rem->GetDexPc());
+        CheckEntrypointTypes<kQuickLmod, int64_t, int64_t, int64_t>();
+      break;
+    }
+
+    case Primitive::kPrimFloat: {
+      codegen_->InvokeRuntime(kQuickFmodf, rem, rem->GetDexPc());
+      CheckEntrypointTypes<kQuickFmodf, float, float, float>();
+      break;
+    }
+
+    case Primitive::kPrimDouble: {
+      codegen_->InvokeRuntime(kQuickFmod, rem, rem->GetDexPc());
+      CheckEntrypointTypes<kQuickFmod, double, double, double>();
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected rem type " << type;
+  }
+}
+
+
 void LocationsBuilderARMVIXL::VisitDivZeroCheck(HDivZeroCheck* instruction) {
+  // TODO(VIXL): https://android-review.googlesource.com/#/c/275337/
   LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
       ? LocationSummary::kCallOnSlowPath
       : LocationSummary::kNoCall;
@@ -1976,9 +2779,7 @@
       if (value.IsRegisterPair()) {
         UseScratchRegisterScope temps(GetVIXLAssembler());
         vixl32::Register temp = temps.Acquire();
-        __ Orrs(temp,
-                value.AsRegisterPairLow<vixl32::Register>(),
-                Operand(value.AsRegisterPairHigh<vixl32::Register>()));
+        __ Orrs(temp, LowRegisterFrom(value), HighRegisterFrom(value));
         __ B(eq, slow_path->GetEntryLabel());
       } else {
         DCHECK(value.IsConstant()) << value;
@@ -1993,6 +2794,1852 @@
   }
 }
 
+void InstructionCodeGeneratorARMVIXL::HandleIntegerRotate(HRor* ror) {
+  LocationSummary* locations = ror->GetLocations();
+  vixl32::Register in = InputRegisterAt(ror, 0);
+  Location rhs = locations->InAt(1);
+  vixl32::Register out = OutputRegister(ror);
+
+  if (rhs.IsConstant()) {
+    // Arm32 and Thumb2 assemblers require a rotation on the interval [1,31],
+    // so map all rotations to a +ve. equivalent in that range.
+    // (e.g. left *or* right by -2 bits == 30 bits in the same direction.)
+    uint32_t rot = CodeGenerator::GetInt32ValueOf(rhs.GetConstant()) & 0x1F;
+    if (rot) {
+      // Rotate, mapping left rotations to right equivalents if necessary.
+      // (e.g. left by 2 bits == right by 30.)
+      __ Ror(out, in, rot);
+    } else if (!out.Is(in)) {
+      __ Mov(out, in);
+    }
+  } else {
+    __ Ror(out, in, RegisterFrom(rhs));
+  }
+}
+
+// Gain some speed by mapping all Long rotates onto equivalent pairs of Integer
+// rotates by swapping input regs (effectively rotating by the first 32-bits of
+// a larger rotation) or flipping direction (thus treating larger right/left
+// rotations as sub-word sized rotations in the other direction) as appropriate.
+void InstructionCodeGeneratorARMVIXL::HandleLongRotate(HRor* ror) {
+  LocationSummary* locations = ror->GetLocations();
+  vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
+  vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
+  Location rhs = locations->InAt(1);
+  vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
+  vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
+
+  if (rhs.IsConstant()) {
+    uint64_t rot = CodeGenerator::GetInt64ValueOf(rhs.GetConstant());
+    // Map all rotations to +ve. equivalents on the interval [0,63].
+    rot &= kMaxLongShiftDistance;
+    // For rotates over a word in size, 'pre-rotate' by 32-bits to keep rotate
+    // logic below to a simple pair of binary orr.
+    // (e.g. 34 bits == in_reg swap + 2 bits right.)
+    if (rot >= kArmBitsPerWord) {
+      rot -= kArmBitsPerWord;
+      std::swap(in_reg_hi, in_reg_lo);
+    }
+    // Rotate, or mov to out for zero or word size rotations.
+    if (rot != 0u) {
+      __ Lsr(out_reg_hi, in_reg_hi, rot);
+      __ Orr(out_reg_hi, out_reg_hi, Operand(in_reg_lo, ShiftType::LSL, kArmBitsPerWord - rot));
+      __ Lsr(out_reg_lo, in_reg_lo, rot);
+      __ Orr(out_reg_lo, out_reg_lo, Operand(in_reg_hi, ShiftType::LSL, kArmBitsPerWord - rot));
+    } else {
+      __ Mov(out_reg_lo, in_reg_lo);
+      __ Mov(out_reg_hi, in_reg_hi);
+    }
+  } else {
+    vixl32::Register shift_right = RegisterFrom(locations->GetTemp(0));
+    vixl32::Register shift_left = RegisterFrom(locations->GetTemp(1));
+    vixl32::Label end;
+    vixl32::Label shift_by_32_plus_shift_right;
+
+    __ And(shift_right, RegisterFrom(rhs), 0x1F);
+    __ Lsrs(shift_left, RegisterFrom(rhs), 6);
+    // TODO(VIXL): Check that flags are kept after "vixl32::LeaveFlags" enabled.
+    __ Rsb(shift_left, shift_right, kArmBitsPerWord);
+    __ B(cc, &shift_by_32_plus_shift_right);
+
+    // out_reg_hi = (reg_hi << shift_left) | (reg_lo >> shift_right).
+    // out_reg_lo = (reg_lo << shift_left) | (reg_hi >> shift_right).
+    __ Lsl(out_reg_hi, in_reg_hi, shift_left);
+    __ Lsr(out_reg_lo, in_reg_lo, shift_right);
+    __ Add(out_reg_hi, out_reg_hi, out_reg_lo);
+    __ Lsl(out_reg_lo, in_reg_lo, shift_left);
+    __ Lsr(shift_left, in_reg_hi, shift_right);
+    __ Add(out_reg_lo, out_reg_lo, shift_left);
+    __ B(&end);
+
+    __ Bind(&shift_by_32_plus_shift_right);  // Shift by 32+shift_right.
+    // out_reg_hi = (reg_hi >> shift_right) | (reg_lo << shift_left).
+    // out_reg_lo = (reg_lo >> shift_right) | (reg_hi << shift_left).
+    __ Lsr(out_reg_hi, in_reg_hi, shift_right);
+    __ Lsl(out_reg_lo, in_reg_lo, shift_left);
+    __ Add(out_reg_hi, out_reg_hi, out_reg_lo);
+    __ Lsr(out_reg_lo, in_reg_lo, shift_right);
+    __ Lsl(shift_right, in_reg_hi, shift_left);
+    __ Add(out_reg_lo, out_reg_lo, shift_right);
+
+    __ Bind(&end);
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitRor(HRor* ror) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
+  switch (ror->GetResultType()) {
+    case Primitive::kPrimInt: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RegisterOrConstant(ror->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+    }
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      if (ror->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::ConstantLocation(ror->InputAt(1)->AsConstant()));
+      } else {
+        locations->SetInAt(1, Location::RequiresRegister());
+        locations->AddTemp(Location::RequiresRegister());
+        locations->AddTemp(Location::RequiresRegister());
+      }
+      locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitRor(HRor* ror) {
+  Primitive::Type type = ror->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt: {
+      HandleIntegerRotate(ror);
+      break;
+    }
+    case Primitive::kPrimLong: {
+      HandleLongRotate(ror);
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << type;
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARMVIXL::HandleShift(HBinaryOperation* op) {
+  DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
+
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall);
+
+  switch (op->GetResultType()) {
+    case Primitive::kPrimInt: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      if (op->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant()));
+        locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      } else {
+        locations->SetInAt(1, Location::RequiresRegister());
+        // Make the output overlap, as it will be used to hold the masked
+        // second input.
+        locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+      }
+      break;
+    }
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      if (op->InputAt(1)->IsConstant()) {
+        locations->SetInAt(1, Location::ConstantLocation(op->InputAt(1)->AsConstant()));
+        // For simplicity, use kOutputOverlap even though we only require that low registers
+        // don't clash with high registers which the register allocator currently guarantees.
+        locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+      } else {
+        locations->SetInAt(1, Location::RequiresRegister());
+        locations->AddTemp(Location::RequiresRegister());
+        locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::HandleShift(HBinaryOperation* op) {
+  DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
+
+  LocationSummary* locations = op->GetLocations();
+  Location out = locations->Out();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+
+  Primitive::Type type = op->GetResultType();
+  switch (type) {
+    case Primitive::kPrimInt: {
+      vixl32::Register out_reg = OutputRegister(op);
+      vixl32::Register first_reg = InputRegisterAt(op, 0);
+      if (second.IsRegister()) {
+        vixl32::Register second_reg = RegisterFrom(second);
+        // ARM doesn't mask the shift count so we need to do it ourselves.
+        __ And(out_reg, second_reg, kMaxIntShiftDistance);
+        if (op->IsShl()) {
+          __ Lsl(out_reg, first_reg, out_reg);
+        } else if (op->IsShr()) {
+          __ Asr(out_reg, first_reg, out_reg);
+        } else {
+          __ Lsr(out_reg, first_reg, out_reg);
+        }
+      } else {
+        int32_t cst = second.GetConstant()->AsIntConstant()->GetValue();
+        uint32_t shift_value = cst & kMaxIntShiftDistance;
+        if (shift_value == 0) {  // ARM does not support shifting with 0 immediate.
+          __ Mov(out_reg, first_reg);
+        } else if (op->IsShl()) {
+          __ Lsl(out_reg, first_reg, shift_value);
+        } else if (op->IsShr()) {
+          __ Asr(out_reg, first_reg, shift_value);
+        } else {
+          __ Lsr(out_reg, first_reg, shift_value);
+        }
+      }
+      break;
+    }
+    case Primitive::kPrimLong: {
+      vixl32::Register o_h = HighRegisterFrom(out);
+      vixl32::Register o_l = LowRegisterFrom(out);
+
+      vixl32::Register high = HighRegisterFrom(first);
+      vixl32::Register low = LowRegisterFrom(first);
+
+      if (second.IsRegister()) {
+        vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+
+        vixl32::Register second_reg = RegisterFrom(second);
+
+        if (op->IsShl()) {
+          __ And(o_l, second_reg, kMaxLongShiftDistance);
+          // Shift the high part
+          __ Lsl(o_h, high, o_l);
+          // Shift the low part and `or` what overflew on the high part
+          __ Rsb(temp, o_l, kArmBitsPerWord);
+          __ Lsr(temp, low, temp);
+          __ Orr(o_h, o_h, temp);
+          // If the shift is > 32 bits, override the high part
+          __ Subs(temp, o_l, kArmBitsPerWord);
+          {
+            AssemblerAccurateScope guard(GetVIXLAssembler(),
+                                         3 * kArmInstrMaxSizeInBytes,
+                                         CodeBufferCheckScope::kMaximumSize);
+            __ it(pl);
+            __ lsl(pl, o_h, low, temp);
+          }
+          // Shift the low part
+          __ Lsl(o_l, low, o_l);
+        } else if (op->IsShr()) {
+          __ And(o_h, second_reg, kMaxLongShiftDistance);
+          // Shift the low part
+          __ Lsr(o_l, low, o_h);
+          // Shift the high part and `or` what underflew on the low part
+          __ Rsb(temp, o_h, kArmBitsPerWord);
+          __ Lsl(temp, high, temp);
+          __ Orr(o_l, o_l, temp);
+          // If the shift is > 32 bits, override the low part
+          __ Subs(temp, o_h, kArmBitsPerWord);
+          {
+            AssemblerAccurateScope guard(GetVIXLAssembler(),
+                                         3 * kArmInstrMaxSizeInBytes,
+                                         CodeBufferCheckScope::kMaximumSize);
+            __ it(pl);
+            __ asr(pl, o_l, high, temp);
+          }
+          // Shift the high part
+          __ Asr(o_h, high, o_h);
+        } else {
+          __ And(o_h, second_reg, kMaxLongShiftDistance);
+          // same as Shr except we use `Lsr`s and not `Asr`s
+          __ Lsr(o_l, low, o_h);
+          __ Rsb(temp, o_h, kArmBitsPerWord);
+          __ Lsl(temp, high, temp);
+          __ Orr(o_l, o_l, temp);
+          __ Subs(temp, o_h, kArmBitsPerWord);
+          {
+            AssemblerAccurateScope guard(GetVIXLAssembler(),
+                                         3 * kArmInstrMaxSizeInBytes,
+                                         CodeBufferCheckScope::kMaximumSize);
+          __ it(pl);
+          __ lsr(pl, o_l, high, temp);
+          }
+          __ Lsr(o_h, high, o_h);
+        }
+      } else {
+        // Register allocator doesn't create partial overlap.
+        DCHECK(!o_l.Is(high));
+        DCHECK(!o_h.Is(low));
+        int32_t cst = second.GetConstant()->AsIntConstant()->GetValue();
+        uint32_t shift_value = cst & kMaxLongShiftDistance;
+        if (shift_value > 32) {
+          if (op->IsShl()) {
+            __ Lsl(o_h, low, shift_value - 32);
+            __ Mov(o_l, 0);
+          } else if (op->IsShr()) {
+            __ Asr(o_l, high, shift_value - 32);
+            __ Asr(o_h, high, 31);
+          } else {
+            __ Lsr(o_l, high, shift_value - 32);
+            __ Mov(o_h, 0);
+          }
+        } else if (shift_value == 32) {
+          if (op->IsShl()) {
+            __ Mov(o_h, low);
+            __ Mov(o_l, 0);
+          } else if (op->IsShr()) {
+            __ Mov(o_l, high);
+            __ Asr(o_h, high, 31);
+          } else {
+            __ Mov(o_l, high);
+            __ Mov(o_h, 0);
+          }
+        } else if (shift_value == 1) {
+          if (op->IsShl()) {
+            __ Lsls(o_l, low, 1);
+            __ Adc(o_h, high, high);
+          } else if (op->IsShr()) {
+            __ Asrs(o_h, high, 1);
+            __ Rrx(o_l, low);
+          } else {
+            __ Lsrs(o_h, high, 1);
+            __ Rrx(o_l, low);
+          }
+        } else {
+          DCHECK(2 <= shift_value && shift_value < 32) << shift_value;
+          if (op->IsShl()) {
+            __ Lsl(o_h, high, shift_value);
+            __ Orr(o_h, o_h, Operand(low, ShiftType::LSR, 32 - shift_value));
+            __ Lsl(o_l, low, shift_value);
+          } else if (op->IsShr()) {
+            __ Lsr(o_l, low, shift_value);
+            __ Orr(o_l, o_l, Operand(high, ShiftType::LSL, 32 - shift_value));
+            __ Asr(o_h, high, shift_value);
+          } else {
+            __ Lsr(o_l, low, shift_value);
+            __ Orr(o_l, o_l, Operand(high, ShiftType::LSL, 32 - shift_value));
+            __ Lsr(o_h, high, shift_value);
+          }
+        }
+      }
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected operation type " << type;
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitShl(HShl* shl) {
+  HandleShift(shl);
+}
+
+void LocationsBuilderARMVIXL::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitShr(HShr* shr) {
+  HandleShift(shr);
+}
+
+void LocationsBuilderARMVIXL::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitUShr(HUShr* ushr) {
+  HandleShift(ushr);
+}
+
+void LocationsBuilderARMVIXL::VisitNewInstance(HNewInstance* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
+  if (instruction->IsStringAlloc()) {
+    locations->AddTemp(LocationFrom(kMethodRegister));
+  } else {
+    InvokeRuntimeCallingConventionARMVIXL calling_convention;
+    locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+    locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  }
+  locations->SetOut(LocationFrom(r0));
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitNewInstance(HNewInstance* instruction) {
+  // Note: if heap poisoning is enabled, the entry point takes cares
+  // of poisoning the reference.
+  if (instruction->IsStringAlloc()) {
+    // String is allocated through StringFactory. Call NewEmptyString entry point.
+    vixl32::Register temp = RegisterFrom(instruction->GetLocations()->GetTemp(0));
+    MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize);
+    GetAssembler()->LoadFromOffset(kLoadWord, temp, tr, QUICK_ENTRY_POINT(pNewEmptyString));
+    GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, code_offset.Int32Value());
+    AssemblerAccurateScope aas(GetVIXLAssembler(),
+                               kArmInstrMaxSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+    __ blx(lr);
+    codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
+  } else {
+    codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
+    CheckEntrypointTypes<kQuickAllocObjectWithAccessCheck, void*, uint32_t, ArtMethod*>();
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitNewArray(HNewArray* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetOut(LocationFrom(r0));
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(2)));
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitNewArray(HNewArray* instruction) {
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  __ Mov(calling_convention.GetRegisterAt(0), instruction->GetTypeIndex());
+  // Note: if heap poisoning is enabled, the entry point takes cares
+  // of poisoning the reference.
+  codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
+  CheckEntrypointTypes<kQuickAllocArrayWithAccessCheck, void*, uint32_t, int32_t, ArtMethod*>();
+}
+
+void LocationsBuilderARMVIXL::VisitParameterValue(HParameterValue* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
+  if (location.IsStackSlot()) {
+    location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+  } else if (location.IsDoubleStackSlot()) {
+    location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
+  }
+  locations->SetOut(location);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitParameterValue(
+    HParameterValue* instruction ATTRIBUTE_UNUSED) {
+  // Nothing to do, the parameter is already at its location.
+}
+
+void LocationsBuilderARMVIXL::VisitCurrentMethod(HCurrentMethod* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetOut(LocationFrom(kMethodRegister));
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitCurrentMethod(
+    HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
+  // Nothing to do, the method is already at its location.
+}
+
+void LocationsBuilderARMVIXL::VisitNot(HNot* not_) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitNot(HNot* not_) {
+  LocationSummary* locations = not_->GetLocations();
+  Location out = locations->Out();
+  Location in = locations->InAt(0);
+  switch (not_->GetResultType()) {
+    case Primitive::kPrimInt:
+      __ Mvn(OutputRegister(not_), InputRegisterAt(not_, 0));
+      break;
+
+    case Primitive::kPrimLong:
+      __ Mvn(LowRegisterFrom(out), LowRegisterFrom(in));
+      __ Mvn(HighRegisterFrom(out), HighRegisterFrom(in));
+      break;
+
+    default:
+      LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitBooleanNot(HBooleanNot* bool_not) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(bool_not, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitBooleanNot(HBooleanNot* bool_not) {
+  __ Eor(OutputRegister(bool_not), InputRegister(bool_not), 1);
+}
+
+void LocationsBuilderARMVIXL::VisitCompare(HCompare* compare) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
+  switch (compare->InputAt(0)->GetType()) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt:
+    case Primitive::kPrimLong: {
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      // Output overlaps because it is written before doing the low comparison.
+      locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      locations->SetInAt(0, Location::RequiresFpuRegister());
+      locations->SetInAt(1, ArithmeticZeroOrFpuRegister(compare->InputAt(1)));
+      locations->SetOut(Location::RequiresRegister());
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitCompare(HCompare* compare) {
+  LocationSummary* locations = compare->GetLocations();
+  vixl32::Register out = OutputRegister(compare);
+  Location left = locations->InAt(0);
+  Location right = locations->InAt(1);
+
+  vixl32::Label less, greater, done;
+  Primitive::Type type = compare->InputAt(0)->GetType();
+  vixl32::Condition less_cond = vixl32::Condition(kNone);
+  switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt: {
+      // Emit move to `out` before the `Cmp`, as `Mov` might affect the status flags.
+      __ Mov(out, 0);
+      __ Cmp(RegisterFrom(left), RegisterFrom(right));  // Signed compare.
+      less_cond = lt;
+      break;
+    }
+    case Primitive::kPrimLong: {
+      __ Cmp(HighRegisterFrom(left), HighRegisterFrom(right));  // Signed compare.
+      __ B(lt, &less);
+      __ B(gt, &greater);
+      // Emit move to `out` before the last `Cmp`, as `Mov` might affect the status flags.
+      __ Mov(out, 0);
+      __ Cmp(LowRegisterFrom(left), LowRegisterFrom(right));  // Unsigned compare.
+      less_cond = lo;
+      break;
+    }
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble: {
+      __ Mov(out, 0);
+      GenerateVcmp(compare);
+      // To branch on the FP compare result we transfer FPSCR to APSR (encoded as PC in VMRS).
+      __ Vmrs(RegisterOrAPSR_nzcv(kPcCode), FPSCR);
+      less_cond = ARMFPCondition(kCondLT, compare->IsGtBias());
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected compare type " << type;
+      UNREACHABLE();
+  }
+
+  __ B(eq, &done);
+  __ B(less_cond, &less);
+
+  __ Bind(&greater);
+  __ Mov(out, 1);
+  __ B(&done);
+
+  __ Bind(&less);
+  __ Mov(out, -1);
+
+  __ Bind(&done);
+}
+
+void LocationsBuilderARMVIXL::VisitPhi(HPhi* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
+    locations->SetInAt(i, Location::Any());
+  }
+  locations->SetOut(Location::Any());
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
+  LOG(FATAL) << "Unreachable";
+}
+
+void CodeGeneratorARMVIXL::GenerateMemoryBarrier(MemBarrierKind kind) {
+  // TODO (ported from quick): revisit ARM barrier kinds.
+  DmbOptions flavor = DmbOptions::ISH;  // Quiet C++ warnings.
+  switch (kind) {
+    case MemBarrierKind::kAnyStore:
+    case MemBarrierKind::kLoadAny:
+    case MemBarrierKind::kAnyAny: {
+      flavor = DmbOptions::ISH;
+      break;
+    }
+    case MemBarrierKind::kStoreStore: {
+      flavor = DmbOptions::ISHST;
+      break;
+    }
+    default:
+      LOG(FATAL) << "Unexpected memory barrier " << kind;
+  }
+  __ Dmb(flavor);
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicLoad(vixl32::Register addr,
+                                                             uint32_t offset,
+                                                             vixl32::Register out_lo,
+                                                             vixl32::Register out_hi) {
+  UseScratchRegisterScope temps(GetVIXLAssembler());
+  if (offset != 0) {
+    vixl32::Register temp = temps.Acquire();
+    __ Add(temp, addr, offset);
+    addr = temp;
+  }
+  __ Ldrexd(out_lo, out_hi, addr);
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateWideAtomicStore(vixl32::Register addr,
+                                                              uint32_t offset,
+                                                              vixl32::Register value_lo,
+                                                              vixl32::Register value_hi,
+                                                              vixl32::Register temp1,
+                                                              vixl32::Register temp2,
+                                                              HInstruction* instruction) {
+  UseScratchRegisterScope temps(GetVIXLAssembler());
+  vixl32::Label fail;
+  if (offset != 0) {
+    vixl32::Register temp = temps.Acquire();
+    __ Add(temp, addr, offset);
+    addr = temp;
+  }
+  __ Bind(&fail);
+  // We need a load followed by store. (The address used in a STREX instruction must
+  // be the same as the address in the most recently executed LDREX instruction.)
+  __ Ldrexd(temp1, temp2, addr);
+  codegen_->MaybeRecordImplicitNullCheck(instruction);
+  __ Strexd(temp1, value_lo, value_hi, addr);
+  __ Cbnz(temp1, &fail);
+}
+
+void LocationsBuilderARMVIXL::HandleFieldSet(
+    HInstruction* instruction, const FieldInfo& field_info) {
+  DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
+
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+
+  Primitive::Type field_type = field_info.GetFieldType();
+  if (Primitive::IsFloatingPointType(field_type)) {
+    locations->SetInAt(1, Location::RequiresFpuRegister());
+  } else {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
+
+  bool is_wide = field_type == Primitive::kPrimLong || field_type == Primitive::kPrimDouble;
+  bool generate_volatile = field_info.IsVolatile()
+      && is_wide
+      && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
+  // Temporary registers for the write barrier.
+  // TODO: consider renaming StoreNeedsWriteBarrier to StoreNeedsGCMark.
+  if (needs_write_barrier) {
+    locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
+    locations->AddTemp(Location::RequiresRegister());
+  } else if (generate_volatile) {
+    // ARM encoding have some additional constraints for ldrexd/strexd:
+    // - registers need to be consecutive
+    // - the first register should be even but not R14.
+    // We don't test for ARM yet, and the assertion makes sure that we
+    // revisit this if we ever enable ARM encoding.
+    DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
+
+    locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+    if (field_type == Primitive::kPrimDouble) {
+      // For doubles we need two more registers to copy the value.
+      locations->AddTemp(LocationFrom(r2));
+      locations->AddTemp(LocationFrom(r3));
+    }
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::HandleFieldSet(HInstruction* instruction,
+                                                     const FieldInfo& field_info,
+                                                     bool value_can_be_null) {
+  DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
+
+  LocationSummary* locations = instruction->GetLocations();
+  vixl32::Register base = InputRegisterAt(instruction, 0);
+  Location value = locations->InAt(1);
+
+  bool is_volatile = field_info.IsVolatile();
+  bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
+  Primitive::Type field_type = field_info.GetFieldType();
+  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
+
+  if (is_volatile) {
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
+  }
+
+  switch (field_type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte: {
+      GetAssembler()->StoreToOffset(kStoreByte, RegisterFrom(value), base, offset);
+      break;
+    }
+
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar: {
+      GetAssembler()->StoreToOffset(kStoreHalfword, RegisterFrom(value), base, offset);
+      break;
+    }
+
+    case Primitive::kPrimInt:
+    case Primitive::kPrimNot: {
+      if (kPoisonHeapReferences && needs_write_barrier) {
+        // Note that in the case where `value` is a null reference,
+        // we do not enter this block, as a null reference does not
+        // need poisoning.
+        DCHECK_EQ(field_type, Primitive::kPrimNot);
+        vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+        __ Mov(temp, RegisterFrom(value));
+        GetAssembler()->PoisonHeapReference(temp);
+        GetAssembler()->StoreToOffset(kStoreWord, temp, base, offset);
+      } else {
+        GetAssembler()->StoreToOffset(kStoreWord, RegisterFrom(value), base, offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      if (is_volatile && !atomic_ldrd_strd) {
+        GenerateWideAtomicStore(base,
+                                offset,
+                                LowRegisterFrom(value),
+                                HighRegisterFrom(value),
+                                RegisterFrom(locations->GetTemp(0)),
+                                RegisterFrom(locations->GetTemp(1)),
+                                instruction);
+      } else {
+        GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), base, offset);
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      }
+      break;
+    }
+
+    case Primitive::kPrimFloat: {
+      GetAssembler()->StoreSToOffset(SRegisterFrom(value), base, offset);
+      break;
+    }
+
+    case Primitive::kPrimDouble: {
+      vixl32::DRegister value_reg = DRegisterFrom(value);
+      if (is_volatile && !atomic_ldrd_strd) {
+        vixl32::Register value_reg_lo = RegisterFrom(locations->GetTemp(0));
+        vixl32::Register value_reg_hi = RegisterFrom(locations->GetTemp(1));
+
+        __ Vmov(value_reg_lo, value_reg_hi, value_reg);
+
+        GenerateWideAtomicStore(base,
+                                offset,
+                                value_reg_lo,
+                                value_reg_hi,
+                                RegisterFrom(locations->GetTemp(2)),
+                                RegisterFrom(locations->GetTemp(3)),
+                                instruction);
+      } else {
+        GetAssembler()->StoreDToOffset(value_reg, base, offset);
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      }
+      break;
+    }
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << field_type;
+      UNREACHABLE();
+  }
+
+  // Longs and doubles are handled in the switch.
+  if (field_type != Primitive::kPrimLong && field_type != Primitive::kPrimDouble) {
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
+  }
+
+  if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
+    vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+    vixl32::Register card = RegisterFrom(locations->GetTemp(1));
+    codegen_->MarkGCCard(temp, card, base, RegisterFrom(value), value_can_be_null);
+  }
+
+  if (is_volatile) {
+    codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
+  }
+}
+
+void LocationsBuilderARMVIXL::HandleFieldGet(HInstruction* instruction,
+                                             const FieldInfo& field_info) {
+  DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+
+  bool object_field_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (field_info.GetFieldType() == Primitive::kPrimNot);
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_field_get_with_read_barrier ?
+                                                       LocationSummary::kCallOnSlowPath :
+                                                       LocationSummary::kNoCall);
+  if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+    locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  }
+  locations->SetInAt(0, Location::RequiresRegister());
+
+  bool volatile_for_double = field_info.IsVolatile()
+      && (field_info.GetFieldType() == Primitive::kPrimDouble)
+      && !codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
+  // The output overlaps in case of volatile long: we don't want the
+  // code generated by GenerateWideAtomicLoad to overwrite the
+  // object's location.  Likewise, in the case of an object field get
+  // with read barriers enabled, we do not want the load to overwrite
+  // the object's location, as we need it to emit the read barrier.
+  bool overlap = (field_info.IsVolatile() && (field_info.GetFieldType() == Primitive::kPrimLong)) ||
+      object_field_get_with_read_barrier;
+
+  if (Primitive::IsFloatingPointType(instruction->GetType())) {
+    locations->SetOut(Location::RequiresFpuRegister());
+  } else {
+    locations->SetOut(Location::RequiresRegister(),
+                      (overlap ? Location::kOutputOverlap : Location::kNoOutputOverlap));
+  }
+  if (volatile_for_double) {
+    // ARM encoding have some additional constraints for ldrexd/strexd:
+    // - registers need to be consecutive
+    // - the first register should be even but not R14.
+    // We don't test for ARM yet, and the assertion makes sure that we
+    // revisit this if we ever enable ARM encoding.
+    DCHECK_EQ(InstructionSet::kThumb2, codegen_->GetInstructionSet());
+    locations->AddTemp(Location::RequiresRegister());
+    locations->AddTemp(Location::RequiresRegister());
+  } else if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in CodeGeneratorARM::GenerateFieldLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
+  }
+}
+
+Location LocationsBuilderARMVIXL::ArithmeticZeroOrFpuRegister(HInstruction* input) {
+  DCHECK(Primitive::IsFloatingPointType(input->GetType())) << input->GetType();
+  if ((input->IsFloatConstant() && (input->AsFloatConstant()->IsArithmeticZero())) ||
+      (input->IsDoubleConstant() && (input->AsDoubleConstant()->IsArithmeticZero()))) {
+    return Location::ConstantLocation(input->AsConstant());
+  } else {
+    return Location::RequiresFpuRegister();
+  }
+}
+
+Location LocationsBuilderARMVIXL::ArmEncodableConstantOrRegister(HInstruction* constant,
+                                                                 Opcode opcode) {
+  DCHECK(!Primitive::IsFloatingPointType(constant->GetType()));
+  if (constant->IsConstant() &&
+      CanEncodeConstantAsImmediate(constant->AsConstant(), opcode)) {
+    return Location::ConstantLocation(constant->AsConstant());
+  }
+  return Location::RequiresRegister();
+}
+
+bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(HConstant* input_cst,
+                                                           Opcode opcode) {
+  uint64_t value = static_cast<uint64_t>(Int64FromConstant(input_cst));
+  if (Primitive::Is64BitType(input_cst->GetType())) {
+    Opcode high_opcode = opcode;
+    SetCc low_set_cc = kCcDontCare;
+    switch (opcode) {
+      case SUB:
+        // Flip the operation to an ADD.
+        value = -value;
+        opcode = ADD;
+        FALLTHROUGH_INTENDED;
+      case ADD:
+        if (Low32Bits(value) == 0u) {
+          return CanEncodeConstantAsImmediate(High32Bits(value), opcode, kCcDontCare);
+        }
+        high_opcode = ADC;
+        low_set_cc = kCcSet;
+        break;
+      default:
+        break;
+    }
+    return CanEncodeConstantAsImmediate(Low32Bits(value), opcode, low_set_cc) &&
+        CanEncodeConstantAsImmediate(High32Bits(value), high_opcode, kCcDontCare);
+  } else {
+    return CanEncodeConstantAsImmediate(Low32Bits(value), opcode);
+  }
+}
+
+// TODO(VIXL): Replace art::arm::SetCc` with `vixl32::FlagsUpdate after flags set optimization
+// enabled.
+bool LocationsBuilderARMVIXL::CanEncodeConstantAsImmediate(uint32_t value,
+                                                           Opcode opcode,
+                                                           SetCc set_cc) {
+  ArmVIXLAssembler* assembler = codegen_->GetAssembler();
+  if (assembler->ShifterOperandCanHold(opcode, value, set_cc)) {
+    return true;
+  }
+  Opcode neg_opcode = kNoOperand;
+  switch (opcode) {
+    case AND: neg_opcode = BIC; value = ~value; break;
+    case ORR: neg_opcode = ORN; value = ~value; break;
+    case ADD: neg_opcode = SUB; value = -value; break;
+    case ADC: neg_opcode = SBC; value = ~value; break;
+    case SUB: neg_opcode = ADD; value = -value; break;
+    case SBC: neg_opcode = ADC; value = ~value; break;
+    default:
+      return false;
+  }
+  return assembler->ShifterOperandCanHold(neg_opcode, value, set_cc);
+}
+
+void InstructionCodeGeneratorARMVIXL::HandleFieldGet(HInstruction* instruction,
+                                                     const FieldInfo& field_info) {
+  DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
+
+  LocationSummary* locations = instruction->GetLocations();
+  vixl32::Register base = InputRegisterAt(instruction, 0);
+  Location out = locations->Out();
+  bool is_volatile = field_info.IsVolatile();
+  bool atomic_ldrd_strd = codegen_->GetInstructionSetFeatures().HasAtomicLdrdAndStrd();
+  Primitive::Type field_type = field_info.GetFieldType();
+  uint32_t offset = field_info.GetFieldOffset().Uint32Value();
+
+  switch (field_type) {
+    case Primitive::kPrimBoolean:
+      GetAssembler()->LoadFromOffset(kLoadUnsignedByte, RegisterFrom(out), base, offset);
+      break;
+
+    case Primitive::kPrimByte:
+      GetAssembler()->LoadFromOffset(kLoadSignedByte, RegisterFrom(out), base, offset);
+      break;
+
+    case Primitive::kPrimShort:
+      GetAssembler()->LoadFromOffset(kLoadSignedHalfword, RegisterFrom(out), base, offset);
+      break;
+
+    case Primitive::kPrimChar:
+      GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, RegisterFrom(out), base, offset);
+      break;
+
+    case Primitive::kPrimInt:
+      GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(out), base, offset);
+      break;
+
+    case Primitive::kPrimNot: {
+      // /* HeapReference<Object> */ out = *(base + offset)
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        TODO_VIXL32(FATAL);
+      } else {
+        GetAssembler()->LoadFromOffset(kLoadWord, RegisterFrom(out), base, offset);
+        // TODO(VIXL): Scope to guarantee the position immediately after the load.
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        if (is_volatile) {
+          codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+        }
+        // If read barriers are enabled, emit read barriers other than
+        // Baker's using a slow path (and also unpoison the loaded
+        // reference, if heap poisoning is enabled).
+        codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, locations->InAt(0), offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimLong:
+      if (is_volatile && !atomic_ldrd_strd) {
+        GenerateWideAtomicLoad(base, offset, LowRegisterFrom(out), HighRegisterFrom(out));
+      } else {
+        GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out), base, offset);
+      }
+      break;
+
+    case Primitive::kPrimFloat:
+      GetAssembler()->LoadSFromOffset(SRegisterFrom(out), base, offset);
+      break;
+
+    case Primitive::kPrimDouble: {
+      vixl32::DRegister out_dreg = DRegisterFrom(out);
+      if (is_volatile && !atomic_ldrd_strd) {
+        vixl32::Register lo = RegisterFrom(locations->GetTemp(0));
+        vixl32::Register hi = RegisterFrom(locations->GetTemp(1));
+        GenerateWideAtomicLoad(base, offset, lo, hi);
+        // TODO(VIXL): Do we need to be immediately after the ldrexd instruction? If so we need a
+        // scope.
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        __ Vmov(out_dreg, lo, hi);
+      } else {
+        GetAssembler()->LoadDFromOffset(out_dreg, base, offset);
+        // TODO(VIXL): Scope to guarantee the position immediately after the load.
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      }
+      break;
+    }
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << field_type;
+      UNREACHABLE();
+  }
+
+  if (field_type == Primitive::kPrimNot || field_type == Primitive::kPrimDouble) {
+    // Potential implicit null checks, in the case of reference or
+    // double fields, are handled in the previous switch statement.
+  } else {
+    // Address cases other than reference and double that may require an implicit null check.
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
+  }
+
+  if (is_volatile) {
+    if (field_type == Primitive::kPrimNot) {
+      // Memory barriers, in the case of references, are also handled
+      // in the previous switch statement.
+    } else {
+      codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
+    }
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+  HandleFieldSet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
+  HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
+}
+
+void LocationsBuilderARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+  HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
+  HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void LocationsBuilderARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+  HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitStaticFieldGet(HStaticFieldGet* instruction) {
+  HandleFieldGet(instruction, instruction->GetFieldInfo());
+}
+
+void LocationsBuilderARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+  HandleFieldSet(instruction, instruction->GetFieldInfo());
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitStaticFieldSet(HStaticFieldSet* instruction) {
+  HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
+}
+
+void LocationsBuilderARMVIXL::VisitUnresolvedInstanceFieldGet(
+    HUnresolvedInstanceFieldGet* instruction) {
+  FieldAccessCallingConventionARMVIXL calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitUnresolvedInstanceFieldGet(
+    HUnresolvedInstanceFieldGet* instruction) {
+  FieldAccessCallingConventionARMVIXL calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderARMVIXL::VisitUnresolvedInstanceFieldSet(
+    HUnresolvedInstanceFieldSet* instruction) {
+  FieldAccessCallingConventionARMVIXL calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitUnresolvedInstanceFieldSet(
+    HUnresolvedInstanceFieldSet* instruction) {
+  FieldAccessCallingConventionARMVIXL calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderARMVIXL::VisitUnresolvedStaticFieldGet(
+    HUnresolvedStaticFieldGet* instruction) {
+  FieldAccessCallingConventionARMVIXL calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitUnresolvedStaticFieldGet(
+    HUnresolvedStaticFieldGet* instruction) {
+  FieldAccessCallingConventionARMVIXL calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderARMVIXL::VisitUnresolvedStaticFieldSet(
+    HUnresolvedStaticFieldSet* instruction) {
+  FieldAccessCallingConventionARMVIXL calling_convention;
+  codegen_->CreateUnresolvedFieldLocationSummary(
+      instruction, instruction->GetFieldType(), calling_convention);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitUnresolvedStaticFieldSet(
+    HUnresolvedStaticFieldSet* instruction) {
+  FieldAccessCallingConventionARMVIXL calling_convention;
+  codegen_->GenerateUnresolvedFieldAccess(instruction,
+                                          instruction->GetFieldType(),
+                                          instruction->GetFieldIndex(),
+                                          instruction->GetDexPc(),
+                                          calling_convention);
+}
+
+void LocationsBuilderARMVIXL::VisitNullCheck(HNullCheck* instruction) {
+  // TODO(VIXL): https://android-review.googlesource.com/#/c/275337/
+  LocationSummary::CallKind call_kind = instruction->CanThrowIntoCatchBlock()
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  locations->SetInAt(0, Location::RequiresRegister());
+  if (instruction->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
+}
+
+void CodeGeneratorARMVIXL::GenerateImplicitNullCheck(HNullCheck* instruction) {
+  if (CanMoveNullCheckToUser(instruction)) {
+    return;
+  }
+
+  UseScratchRegisterScope temps(GetVIXLAssembler());
+  AssemblerAccurateScope aas(GetVIXLAssembler(),
+                             kArmInstrMaxSizeInBytes,
+                             CodeBufferCheckScope::kMaximumSize);
+  __ ldr(temps.Acquire(), MemOperand(InputRegisterAt(instruction, 0)));
+  RecordPcInfo(instruction, instruction->GetDexPc());
+}
+
+void CodeGeneratorARMVIXL::GenerateExplicitNullCheck(HNullCheck* instruction) {
+  NullCheckSlowPathARMVIXL* slow_path =
+      new (GetGraph()->GetArena()) NullCheckSlowPathARMVIXL(instruction);
+  AddSlowPath(slow_path);
+  __ Cbz(InputRegisterAt(instruction, 0), slow_path->GetEntryLabel());
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitNullCheck(HNullCheck* instruction) {
+  codegen_->GenerateNullCheck(instruction);
+}
+
+static LoadOperandType GetLoadOperandType(Primitive::Type type) {
+  switch (type) {
+    case Primitive::kPrimNot:
+      return kLoadWord;
+    case Primitive::kPrimBoolean:
+      return kLoadUnsignedByte;
+    case Primitive::kPrimByte:
+      return kLoadSignedByte;
+    case Primitive::kPrimChar:
+      return kLoadUnsignedHalfword;
+    case Primitive::kPrimShort:
+      return kLoadSignedHalfword;
+    case Primitive::kPrimInt:
+      return kLoadWord;
+    case Primitive::kPrimLong:
+      return kLoadWordPair;
+    case Primitive::kPrimFloat:
+      return kLoadSWord;
+    case Primitive::kPrimDouble:
+      return kLoadDWord;
+    default:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+}
+
+static StoreOperandType GetStoreOperandType(Primitive::Type type) {
+  switch (type) {
+    case Primitive::kPrimNot:
+      return kStoreWord;
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+      return kStoreByte;
+    case Primitive::kPrimChar:
+    case Primitive::kPrimShort:
+      return kStoreHalfword;
+    case Primitive::kPrimInt:
+      return kStoreWord;
+    case Primitive::kPrimLong:
+      return kStoreWordPair;
+    case Primitive::kPrimFloat:
+      return kStoreSWord;
+    case Primitive::kPrimDouble:
+      return kStoreDWord;
+    default:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+}
+
+void CodeGeneratorARMVIXL::LoadFromShiftedRegOffset(Primitive::Type type,
+                                                    Location out_loc,
+                                                    vixl32::Register base,
+                                                    vixl32::Register reg_index,
+                                                    vixl32::Condition cond) {
+  uint32_t shift_count = Primitive::ComponentSizeShift(type);
+  MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count);
+
+  switch (type) {
+    case Primitive::kPrimByte:
+      __ Ldrsb(cond, RegisterFrom(out_loc), mem_address);
+      break;
+    case Primitive::kPrimBoolean:
+      __ Ldrb(cond, RegisterFrom(out_loc), mem_address);
+      break;
+    case Primitive::kPrimShort:
+      __ Ldrsh(cond, RegisterFrom(out_loc), mem_address);
+      break;
+    case Primitive::kPrimChar:
+      __ Ldrh(cond, RegisterFrom(out_loc), mem_address);
+      break;
+    case Primitive::kPrimNot:
+    case Primitive::kPrimInt:
+      __ Ldr(cond, RegisterFrom(out_loc), mem_address);
+      break;
+    // T32 doesn't support LoadFromShiftedRegOffset mem address mode for these types.
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+    default:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+}
+
+void CodeGeneratorARMVIXL::StoreToShiftedRegOffset(Primitive::Type type,
+                                                   Location loc,
+                                                   vixl32::Register base,
+                                                   vixl32::Register reg_index,
+                                                   vixl32::Condition cond) {
+  uint32_t shift_count = Primitive::ComponentSizeShift(type);
+  MemOperand mem_address(base, reg_index, vixl32::LSL, shift_count);
+
+  switch (type) {
+    case Primitive::kPrimByte:
+    case Primitive::kPrimBoolean:
+      __ Strb(cond, RegisterFrom(loc), mem_address);
+      break;
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+      __ Strh(cond, RegisterFrom(loc), mem_address);
+      break;
+    case Primitive::kPrimNot:
+    case Primitive::kPrimInt:
+      __ Str(cond, RegisterFrom(loc), mem_address);
+      break;
+    // T32 doesn't support StoreToShiftedRegOffset mem address mode for these types.
+    case Primitive::kPrimLong:
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+    default:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitArrayGet(HArrayGet* instruction) {
+  bool object_array_get_with_read_barrier =
+      kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction,
+                                                   object_array_get_with_read_barrier ?
+                                                       LocationSummary::kCallOnSlowPath :
+                                                       LocationSummary::kNoCall);
+  if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
+    TODO_VIXL32(FATAL);
+  }
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  if (Primitive::IsFloatingPointType(instruction->GetType())) {
+    locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+  } else {
+    // The output overlaps in the case of an object array get with
+    // read barriers enabled: we do not want the move to overwrite the
+    // array's location, as we need it to emit the read barrier.
+    locations->SetOut(
+        Location::RequiresRegister(),
+        object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+  }
+  // We need a temporary register for the read barrier marking slow
+  // path in CodeGeneratorARM::GenerateArrayLoadWithBakerReadBarrier.
+  // Also need for String compression feature.
+  if ((object_array_get_with_read_barrier && kUseBakerReadBarrier)
+      || (mirror::kUseStringCompression && instruction->IsStringCharAt())) {
+    locations->AddTemp(Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitArrayGet(HArrayGet* instruction) {
+  UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
+  LocationSummary* locations = instruction->GetLocations();
+  Location obj_loc = locations->InAt(0);
+  vixl32::Register obj = InputRegisterAt(instruction, 0);
+  Location index = locations->InAt(1);
+  Location out_loc = locations->Out();
+  uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
+  Primitive::Type type = instruction->GetType();
+  const bool maybe_compressed_char_at = mirror::kUseStringCompression &&
+                                        instruction->IsStringCharAt();
+  HInstruction* array_instr = instruction->GetArray();
+  bool has_intermediate_address = array_instr->IsIntermediateAddress();
+  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+  DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
+
+  switch (type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt: {
+      vixl32::Register length;
+      if (maybe_compressed_char_at) {
+        length = RegisterFrom(locations->GetTemp(0));
+        uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+        GetAssembler()->LoadFromOffset(kLoadWord, length, obj, count_offset);
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      }
+      if (index.IsConstant()) {
+        int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
+        if (maybe_compressed_char_at) {
+          vixl32::Label uncompressed_load, done;
+          __ Lsrs(length, length, 1u);  // LSRS has a 16-bit encoding, TST (immediate) does not.
+          static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                        "Expecting 0=compressed, 1=uncompressed");
+          __ B(cs, &uncompressed_load);
+          GetAssembler()->LoadFromOffset(kLoadUnsignedByte,
+                                         RegisterFrom(out_loc),
+                                         obj,
+                                         data_offset + const_index);
+          __ B(&done);
+          __ Bind(&uncompressed_load);
+          GetAssembler()->LoadFromOffset(GetLoadOperandType(Primitive::kPrimChar),
+                                         RegisterFrom(out_loc),
+                                         obj,
+                                         data_offset + (const_index << 1));
+          __ Bind(&done);
+        } else {
+          uint32_t full_offset = data_offset + (const_index << Primitive::ComponentSizeShift(type));
+
+          LoadOperandType load_type = GetLoadOperandType(type);
+          GetAssembler()->LoadFromOffset(load_type, RegisterFrom(out_loc), obj, full_offset);
+        }
+      } else {
+        vixl32::Register temp = temps.Acquire();
+
+        if (has_intermediate_address) {
+          // We do not need to compute the intermediate address from the array: the
+          // input instruction has done it already. See the comment in
+          // `TryExtractArrayAccessAddress()`.
+          if (kIsDebugBuild) {
+            HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
+            DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset);
+          }
+          temp = obj;
+        } else {
+          __ Add(temp, obj, data_offset);
+        }
+        if (maybe_compressed_char_at) {
+          vixl32::Label uncompressed_load, done;
+          __ Lsrs(length, length, 1u);  // LSRS has a 16-bit encoding, TST (immediate) does not.
+          static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                        "Expecting 0=compressed, 1=uncompressed");
+          __ B(cs, &uncompressed_load);
+          __ Ldrb(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 0));
+          __ B(&done);
+          __ Bind(&uncompressed_load);
+          __ Ldrh(RegisterFrom(out_loc), MemOperand(temp, RegisterFrom(index), vixl32::LSL, 1));
+          __ Bind(&done);
+        } else {
+          codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index));
+        }
+      }
+      break;
+    }
+
+    case Primitive::kPrimNot: {
+      static_assert(
+          sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
+          "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
+      // /* HeapReference<Object> */ out =
+      //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
+      if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+        TODO_VIXL32(FATAL);
+      } else {
+        vixl32::Register out = OutputRegister(instruction);
+        if (index.IsConstant()) {
+          size_t offset =
+              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+          GetAssembler()->LoadFromOffset(kLoadWord, out, obj, offset);
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
+        } else {
+          vixl32::Register temp = temps.Acquire();
+
+          if (has_intermediate_address) {
+            // We do not need to compute the intermediate address from the array: the
+            // input instruction has done it already. See the comment in
+            // `TryExtractArrayAccessAddress()`.
+            if (kIsDebugBuild) {
+              HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
+              DCHECK_EQ(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64(), data_offset);
+            }
+            temp = obj;
+          } else {
+            __ Add(temp, obj, data_offset);
+          }
+          codegen_->LoadFromShiftedRegOffset(type, out_loc, temp, RegisterFrom(index));
+
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          // If read barriers are enabled, emit read barriers other than
+          // Baker's using a slow path (and also unpoison the loaded
+          // reference, if heap poisoning is enabled).
+          codegen_->MaybeGenerateReadBarrierSlow(
+              instruction, out_loc, out_loc, obj_loc, data_offset, index);
+        }
+      }
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+        GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out_loc), obj, offset);
+      } else {
+        vixl32::Register temp = temps.Acquire();
+        __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
+        GetAssembler()->LoadFromOffset(kLoadWordPair, LowRegisterFrom(out_loc), temp, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimFloat: {
+      vixl32::SRegister out = SRegisterFrom(out_loc);
+      if (index.IsConstant()) {
+        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+        GetAssembler()->LoadSFromOffset(out, obj, offset);
+      } else {
+        vixl32::Register temp = temps.Acquire();
+        __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_4));
+        GetAssembler()->LoadSFromOffset(out, temp, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimDouble: {
+      if (index.IsConstant()) {
+        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+        GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), obj, offset);
+      } else {
+        vixl32::Register temp = temps.Acquire();
+        __ Add(temp, obj, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
+        GetAssembler()->LoadDFromOffset(DRegisterFrom(out_loc), temp, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << type;
+      UNREACHABLE();
+  }
+
+  if (type == Primitive::kPrimNot) {
+    // Potential implicit null checks, in the case of reference
+    // arrays, are handled in the previous switch statement.
+  } else if (!maybe_compressed_char_at) {
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitArraySet(HArraySet* instruction) {
+  Primitive::Type value_type = instruction->GetComponentType();
+
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
+      instruction,
+      may_need_runtime_call_for_type_check ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall);
+
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
+  if (Primitive::IsFloatingPointType(value_type)) {
+    locations->SetInAt(2, Location::RequiresFpuRegister());
+  } else {
+    locations->SetInAt(2, Location::RequiresRegister());
+  }
+  if (needs_write_barrier) {
+    // Temporary registers for the write barrier.
+    locations->AddTemp(Location::RequiresRegister());  // Possibly used for ref. poisoning too.
+    locations->AddTemp(Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitArraySet(HArraySet* instruction) {
+  UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
+  LocationSummary* locations = instruction->GetLocations();
+  vixl32::Register array = InputRegisterAt(instruction, 0);
+  Location index = locations->InAt(1);
+  Primitive::Type value_type = instruction->GetComponentType();
+  bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
+  bool needs_write_barrier =
+      CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
+  uint32_t data_offset =
+      mirror::Array::DataOffset(Primitive::ComponentSize(value_type)).Uint32Value();
+  Location value_loc = locations->InAt(2);
+  HInstruction* array_instr = instruction->GetArray();
+  bool has_intermediate_address = array_instr->IsIntermediateAddress();
+  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+  DCHECK(!(has_intermediate_address && kEmitCompilerReadBarrier));
+
+  switch (value_type) {
+    case Primitive::kPrimBoolean:
+    case Primitive::kPrimByte:
+    case Primitive::kPrimShort:
+    case Primitive::kPrimChar:
+    case Primitive::kPrimInt: {
+      if (index.IsConstant()) {
+        int32_t const_index = index.GetConstant()->AsIntConstant()->GetValue();
+        uint32_t full_offset =
+            data_offset + (const_index << Primitive::ComponentSizeShift(value_type));
+        StoreOperandType store_type = GetStoreOperandType(value_type);
+        GetAssembler()->StoreToOffset(store_type, RegisterFrom(value_loc), array, full_offset);
+      } else {
+        vixl32::Register temp = temps.Acquire();
+
+        if (has_intermediate_address) {
+          // We do not need to compute the intermediate address from the array: the
+          // input instruction has done it already. See the comment in
+          // `TryExtractArrayAccessAddress()`.
+          if (kIsDebugBuild) {
+            HIntermediateAddress* tmp = array_instr->AsIntermediateAddress();
+            DCHECK(tmp->GetOffset()->AsIntConstant()->GetValueAsUint64() == data_offset);
+          }
+          temp = array;
+        } else {
+          __ Add(temp, array, data_offset);
+        }
+        codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index));
+      }
+      break;
+    }
+
+    case Primitive::kPrimNot: {
+      vixl32::Register value = RegisterFrom(value_loc);
+      // TryExtractArrayAccessAddress optimization is never applied for non-primitive ArraySet.
+      // See the comment in instruction_simplifier_shared.cc.
+      DCHECK(!has_intermediate_address);
+
+      if (instruction->InputAt(2)->IsNullConstant()) {
+        // Just setting null.
+        if (index.IsConstant()) {
+          size_t offset =
+              (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+          GetAssembler()->StoreToOffset(kStoreWord, value, array, offset);
+        } else {
+          DCHECK(index.IsRegister()) << index;
+          vixl32::Register temp = temps.Acquire();
+          __ Add(temp, array, data_offset);
+          codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index));
+        }
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        DCHECK(!needs_write_barrier);
+        DCHECK(!may_need_runtime_call_for_type_check);
+        break;
+      }
+
+      DCHECK(needs_write_barrier);
+      Location temp1_loc = locations->GetTemp(0);
+      vixl32::Register temp1 = RegisterFrom(temp1_loc);
+      Location temp2_loc = locations->GetTemp(1);
+      vixl32::Register temp2 = RegisterFrom(temp2_loc);
+      uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+      uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+      uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+      vixl32::Label done;
+      SlowPathCodeARMVIXL* slow_path = nullptr;
+
+      if (may_need_runtime_call_for_type_check) {
+        slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathARMVIXL(instruction);
+        codegen_->AddSlowPath(slow_path);
+        if (instruction->GetValueCanBeNull()) {
+          vixl32::Label non_zero;
+          __ Cbnz(value, &non_zero);
+          if (index.IsConstant()) {
+            size_t offset =
+               (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+            GetAssembler()->StoreToOffset(kStoreWord, value, array, offset);
+          } else {
+            DCHECK(index.IsRegister()) << index;
+            vixl32::Register temp = temps.Acquire();
+            __ Add(temp, array, data_offset);
+            codegen_->StoreToShiftedRegOffset(value_type, value_loc, temp, RegisterFrom(index));
+          }
+          codegen_->MaybeRecordImplicitNullCheck(instruction);
+          __ B(&done);
+          __ Bind(&non_zero);
+        }
+
+        // Note that when read barriers are enabled, the type checks
+        // are performed without read barriers.  This is fine, even in
+        // the case where a class object is in the from-space after
+        // the flip, as a comparison involving such a type would not
+        // produce a false positive; it may of course produce a false
+        // negative, in which case we would take the ArraySet slow
+        // path.
+
+        // /* HeapReference<Class> */ temp1 = array->klass_
+        GetAssembler()->LoadFromOffset(kLoadWord, temp1, array, class_offset);
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+        GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        GetAssembler()->LoadFromOffset(kLoadWord, temp1, temp1, component_offset);
+        // /* HeapReference<Class> */ temp2 = value->klass_
+        GetAssembler()->LoadFromOffset(kLoadWord, temp2, value, class_offset);
+        // If heap poisoning is enabled, no need to unpoison `temp1`
+        // nor `temp2`, as we are comparing two poisoned references.
+        __ Cmp(temp1, temp2);
+
+        if (instruction->StaticTypeOfArrayIsObjectArray()) {
+          vixl32::Label do_put;
+          __ B(eq, &do_put);
+          // If heap poisoning is enabled, the `temp1` reference has
+          // not been unpoisoned yet; unpoison it now.
+          GetAssembler()->MaybeUnpoisonHeapReference(temp1);
+
+          // /* HeapReference<Class> */ temp1 = temp1->super_class_
+          GetAssembler()->LoadFromOffset(kLoadWord, temp1, temp1, super_offset);
+          // If heap poisoning is enabled, no need to unpoison
+          // `temp1`, as we are comparing against null below.
+          __ Cbnz(temp1, slow_path->GetEntryLabel());
+          __ Bind(&do_put);
+        } else {
+          __ B(ne, slow_path->GetEntryLabel());
+        }
+      }
+
+      vixl32::Register source = value;
+      if (kPoisonHeapReferences) {
+        // Note that in the case where `value` is a null reference,
+        // we do not enter this block, as a null reference does not
+        // need poisoning.
+        DCHECK_EQ(value_type, Primitive::kPrimNot);
+        __ Mov(temp1, value);
+        GetAssembler()->PoisonHeapReference(temp1);
+        source = temp1;
+      }
+
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+        GetAssembler()->StoreToOffset(kStoreWord, source, array, offset);
+      } else {
+        DCHECK(index.IsRegister()) << index;
+
+        vixl32::Register temp = temps.Acquire();
+        __ Add(temp, array, data_offset);
+        codegen_->StoreToShiftedRegOffset(value_type,
+                                          LocationFrom(source),
+                                          temp,
+                                          RegisterFrom(index));
+      }
+
+      if (!may_need_runtime_call_for_type_check) {
+        codegen_->MaybeRecordImplicitNullCheck(instruction);
+      }
+
+      codegen_->MarkGCCard(temp1, temp2, array, value, instruction->GetValueCanBeNull());
+
+      if (done.IsReferenced()) {
+        __ Bind(&done);
+      }
+
+      if (slow_path != nullptr) {
+        __ Bind(slow_path->GetExitLabel());
+      }
+
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      Location value = locations->InAt(2);
+      if (index.IsConstant()) {
+        size_t offset =
+            (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+        GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), array, offset);
+      } else {
+        vixl32::Register temp = temps.Acquire();
+        __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
+        GetAssembler()->StoreToOffset(kStoreWordPair, LowRegisterFrom(value), temp, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimFloat: {
+      Location value = locations->InAt(2);
+      DCHECK(value.IsFpuRegister());
+      if (index.IsConstant()) {
+        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
+        GetAssembler()->StoreSToOffset(SRegisterFrom(value), array, offset);
+      } else {
+        vixl32::Register temp = temps.Acquire();
+        __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_4));
+        GetAssembler()->StoreSToOffset(SRegisterFrom(value), temp, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimDouble: {
+      Location value = locations->InAt(2);
+      DCHECK(value.IsFpuRegisterPair());
+      if (index.IsConstant()) {
+        size_t offset = (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_8) + data_offset;
+        GetAssembler()->StoreDToOffset(DRegisterFrom(value), array, offset);
+      } else {
+        vixl32::Register temp = temps.Acquire();
+        __ Add(temp, array, Operand(RegisterFrom(index), vixl32::LSL, TIMES_8));
+        GetAssembler()->StoreDToOffset(DRegisterFrom(value), temp, data_offset);
+      }
+      break;
+    }
+
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable type " << value_type;
+      UNREACHABLE();
+  }
+
+  // Objects are handled in the switch.
+  if (value_type != Primitive::kPrimNot) {
+    codegen_->MaybeRecordImplicitNullCheck(instruction);
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitArrayLength(HArrayLength* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitArrayLength(HArrayLength* instruction) {
+  uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
+  vixl32::Register obj = InputRegisterAt(instruction, 0);
+  vixl32::Register out = OutputRegister(instruction);
+  GetAssembler()->LoadFromOffset(kLoadWord, out, obj, offset);
+  codegen_->MaybeRecordImplicitNullCheck(instruction);
+  // Mask out compression flag from String's array length.
+  if (mirror::kUseStringCompression && instruction->IsStringLength()) {
+    __ Lsr(out, out, 1u);
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+  DCHECK(!kEmitCompilerReadBarrier);
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RegisterOrConstant(instruction->GetOffset()));
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitIntermediateAddress(HIntermediateAddress* instruction) {
+  vixl32::Register out = OutputRegister(instruction);
+  vixl32::Register first = InputRegisterAt(instruction, 0);
+  Location second = instruction->GetLocations()->InAt(1);
+
+  // The read barrier instrumentation does not support the HIntermediateAddress instruction yet.
+  DCHECK(!kEmitCompilerReadBarrier);
+
+  if (second.IsRegister()) {
+    __ Add(out, first, RegisterFrom(second));
+  } else {
+    __ Add(out, first, second.GetConstant()->AsIntConstant()->GetValue());
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
+  RegisterSet caller_saves = RegisterSet::Empty();
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(0)));
+  caller_saves.Add(LocationFrom(calling_convention.GetRegisterAt(1)));
+  LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitBoundsCheck(HBoundsCheck* instruction) {
+  SlowPathCodeARMVIXL* slow_path =
+      new (GetGraph()->GetArena()) BoundsCheckSlowPathARMVIXL(instruction);
+  codegen_->AddSlowPath(slow_path);
+
+  vixl32::Register index = InputRegisterAt(instruction, 0);
+  vixl32::Register length = InputRegisterAt(instruction, 1);
+
+  __ Cmp(index, length);
+  __ B(hs, slow_path->GetEntryLabel());
+}
+
+void CodeGeneratorARMVIXL::MarkGCCard(vixl32::Register temp,
+                                      vixl32::Register card,
+                                      vixl32::Register object,
+                                      vixl32::Register value,
+                                      bool can_be_null) {
+  vixl32::Label is_null;
+  if (can_be_null) {
+    __ Cbz(value, &is_null);
+  }
+  GetAssembler()->LoadFromOffset(
+      kLoadWord, card, tr, Thread::CardTableOffset<kArmPointerSize>().Int32Value());
+  __ Lsr(temp, object, gc::accounting::CardTable::kCardShift);
+  __ Strb(card, MemOperand(card, temp));
+  if (can_be_null) {
+    __ Bind(&is_null);
+  }
+}
+
 void LocationsBuilderARMVIXL::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
   LOG(FATAL) << "Unreachable";
 }
@@ -2001,64 +4648,144 @@
   codegen_->GetMoveResolver()->EmitNativeCode(instruction);
 }
 
+void LocationsBuilderARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) {
+  new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
+  // TODO(VIXL): https://android-review.googlesource.com/#/c/275337/ and related.
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitSuspendCheck(HSuspendCheck* instruction) {
+  HBasicBlock* block = instruction->GetBlock();
+  if (block->GetLoopInformation() != nullptr) {
+    DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
+    // The back edge will generate the suspend check.
+    return;
+  }
+  if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
+    // The goto will generate the suspend check.
+    return;
+  }
+  GenerateSuspendCheck(instruction, nullptr);
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateSuspendCheck(HSuspendCheck* instruction,
+                                                           HBasicBlock* successor) {
+  SuspendCheckSlowPathARMVIXL* slow_path =
+      down_cast<SuspendCheckSlowPathARMVIXL*>(instruction->GetSlowPath());
+  if (slow_path == nullptr) {
+    slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathARMVIXL(instruction, successor);
+    instruction->SetSlowPath(slow_path);
+    codegen_->AddSlowPath(slow_path);
+    if (successor != nullptr) {
+      DCHECK(successor->IsLoopHeader());
+      codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
+    }
+  } else {
+    DCHECK_EQ(slow_path->GetSuccessor(), successor);
+  }
+
+  UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
+  vixl32::Register temp = temps.Acquire();
+  GetAssembler()->LoadFromOffset(
+      kLoadUnsignedHalfword, temp, tr, Thread::ThreadFlagsOffset<kArmPointerSize>().Int32Value());
+  if (successor == nullptr) {
+    __ Cbnz(temp, slow_path->GetEntryLabel());
+    __ Bind(slow_path->GetReturnLabel());
+  } else {
+    __ Cbz(temp, codegen_->GetLabelOf(successor));
+    __ B(slow_path->GetEntryLabel());
+  }
+}
+
 ArmVIXLAssembler* ParallelMoveResolverARMVIXL::GetAssembler() const {
   return codegen_->GetAssembler();
 }
 
 void ParallelMoveResolverARMVIXL::EmitMove(size_t index) {
+  UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
   MoveOperands* move = moves_[index];
   Location source = move->GetSource();
   Location destination = move->GetDestination();
 
   if (source.IsRegister()) {
     if (destination.IsRegister()) {
-      __ Mov(destination.AsRegister<vixl32::Register>(), source.AsRegister<vixl32::Register>());
+      __ Mov(RegisterFrom(destination), RegisterFrom(source));
     } else if (destination.IsFpuRegister()) {
-      __ Vmov(destination.AsFpuRegister<vixl32::SRegister>(),
-              source.AsRegister<vixl32::Register>());
+      __ Vmov(SRegisterFrom(destination), RegisterFrom(source));
     } else {
       DCHECK(destination.IsStackSlot());
       GetAssembler()->StoreToOffset(kStoreWord,
-                                    source.AsRegister<vixl32::Register>(),
+                                    RegisterFrom(source),
                                     sp,
                                     destination.GetStackIndex());
     }
   } else if (source.IsStackSlot()) {
-    TODO_VIXL32(FATAL);
+    if (destination.IsRegister()) {
+      GetAssembler()->LoadFromOffset(kLoadWord,
+                                     RegisterFrom(destination),
+                                     sp,
+                                     source.GetStackIndex());
+    } else if (destination.IsFpuRegister()) {
+      GetAssembler()->LoadSFromOffset(SRegisterFrom(destination), sp, source.GetStackIndex());
+    } else {
+      DCHECK(destination.IsStackSlot());
+      vixl32::Register temp = temps.Acquire();
+      GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, source.GetStackIndex());
+      GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
+    }
   } else if (source.IsFpuRegister()) {
-    TODO_VIXL32(FATAL);
+    if (destination.IsRegister()) {
+      __ Vmov(RegisterFrom(destination), SRegisterFrom(source));
+    } else if (destination.IsFpuRegister()) {
+      __ Vmov(SRegisterFrom(destination), SRegisterFrom(source));
+    } else {
+      DCHECK(destination.IsStackSlot());
+      GetAssembler()->StoreSToOffset(SRegisterFrom(source), sp, destination.GetStackIndex());
+    }
   } else if (source.IsDoubleStackSlot()) {
-    TODO_VIXL32(FATAL);
+    if (destination.IsDoubleStackSlot()) {
+      vixl32::DRegister temp = temps.AcquireD();
+      GetAssembler()->LoadDFromOffset(temp, sp, source.GetStackIndex());
+      GetAssembler()->StoreDToOffset(temp, sp, destination.GetStackIndex());
+    } else if (destination.IsRegisterPair()) {
+      DCHECK(ExpectedPairLayout(destination));
+      GetAssembler()->LoadFromOffset(
+          kLoadWordPair, LowRegisterFrom(destination), sp, source.GetStackIndex());
+    } else {
+      DCHECK(destination.IsFpuRegisterPair()) << destination;
+      GetAssembler()->LoadDFromOffset(DRegisterFrom(destination), sp, source.GetStackIndex());
+    }
   } else if (source.IsRegisterPair()) {
     if (destination.IsRegisterPair()) {
-      __ Mov(destination.AsRegisterPairLow<vixl32::Register>(),
-             source.AsRegisterPairLow<vixl32::Register>());
-      __ Mov(destination.AsRegisterPairHigh<vixl32::Register>(),
-             source.AsRegisterPairHigh<vixl32::Register>());
+      __ Mov(LowRegisterFrom(destination), LowRegisterFrom(source));
+      __ Mov(HighRegisterFrom(destination), HighRegisterFrom(source));
     } else if (destination.IsFpuRegisterPair()) {
-      __ Vmov(FromLowSToD(destination.AsFpuRegisterPairLow<vixl32::SRegister>()),
-              source.AsRegisterPairLow<vixl32::Register>(),
-              source.AsRegisterPairHigh<vixl32::Register>());
+      __ Vmov(DRegisterFrom(destination), LowRegisterFrom(source), HighRegisterFrom(source));
     } else {
       DCHECK(destination.IsDoubleStackSlot()) << destination;
       DCHECK(ExpectedPairLayout(source));
       GetAssembler()->StoreToOffset(kStoreWordPair,
-                                    source.AsRegisterPairLow<vixl32::Register>(),
+                                    LowRegisterFrom(source),
                                     sp,
                                     destination.GetStackIndex());
     }
   } else if (source.IsFpuRegisterPair()) {
-    TODO_VIXL32(FATAL);
+    if (destination.IsRegisterPair()) {
+      __ Vmov(LowRegisterFrom(destination), HighRegisterFrom(destination), DRegisterFrom(source));
+    } else if (destination.IsFpuRegisterPair()) {
+      __ Vmov(DRegisterFrom(destination), DRegisterFrom(source));
+    } else {
+      DCHECK(destination.IsDoubleStackSlot()) << destination;
+      GetAssembler()->StoreDToOffset(DRegisterFrom(source), sp, destination.GetStackIndex());
+    }
   } else {
     DCHECK(source.IsConstant()) << source;
     HConstant* constant = source.GetConstant();
     if (constant->IsIntConstant() || constant->IsNullConstant()) {
       int32_t value = CodeGenerator::GetInt32ValueOf(constant);
       if (destination.IsRegister()) {
-        __ Mov(destination.AsRegister<vixl32::Register>(), value);
+        __ Mov(RegisterFrom(destination), value);
       } else {
         DCHECK(destination.IsStackSlot());
-        UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
         vixl32::Register temp = temps.Acquire();
         __ Mov(temp, value);
         GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
@@ -2066,11 +4793,10 @@
     } else if (constant->IsLongConstant()) {
       int64_t value = constant->AsLongConstant()->GetValue();
       if (destination.IsRegisterPair()) {
-        __ Mov(destination.AsRegisterPairLow<vixl32::Register>(), Low32Bits(value));
-        __ Mov(destination.AsRegisterPairHigh<vixl32::Register>(), High32Bits(value));
+        __ Mov(LowRegisterFrom(destination), Low32Bits(value));
+        __ Mov(HighRegisterFrom(destination), High32Bits(value));
       } else {
         DCHECK(destination.IsDoubleStackSlot()) << destination;
-        UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
         vixl32::Register temp = temps.Acquire();
         __ Mov(temp, Low32Bits(value));
         GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
@@ -2083,15 +4809,14 @@
     } else if (constant->IsDoubleConstant()) {
       double value = constant->AsDoubleConstant()->GetValue();
       if (destination.IsFpuRegisterPair()) {
-        __ Vmov(F64, FromLowSToD(destination.AsFpuRegisterPairLow<vixl32::SRegister>()), value);
+        __ Vmov(DRegisterFrom(destination), value);
       } else {
         DCHECK(destination.IsDoubleStackSlot()) << destination;
         uint64_t int_value = bit_cast<uint64_t, double>(value);
-        UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
         vixl32::Register temp = temps.Acquire();
-        GetAssembler()->LoadImmediate(temp, Low32Bits(int_value));
+        __ Mov(temp, Low32Bits(int_value));
         GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
-        GetAssembler()->LoadImmediate(temp, High32Bits(int_value));
+        __ Mov(temp, High32Bits(int_value));
         GetAssembler()->StoreToOffset(kStoreWord,
                                       temp,
                                       sp,
@@ -2101,28 +4826,93 @@
       DCHECK(constant->IsFloatConstant()) << constant->DebugName();
       float value = constant->AsFloatConstant()->GetValue();
       if (destination.IsFpuRegister()) {
-        __ Vmov(F32, destination.AsFpuRegister<vixl32::SRegister>(), value);
+        __ Vmov(SRegisterFrom(destination), value);
       } else {
         DCHECK(destination.IsStackSlot());
-        UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
         vixl32::Register temp = temps.Acquire();
-        GetAssembler()->LoadImmediate(temp, bit_cast<int32_t, float>(value));
+        __ Mov(temp, bit_cast<int32_t, float>(value));
         GetAssembler()->StoreToOffset(kStoreWord, temp, sp, destination.GetStackIndex());
       }
     }
   }
 }
 
-void ParallelMoveResolverARMVIXL::Exchange(Register reg, int mem) {
-  TODO_VIXL32(FATAL);
+void ParallelMoveResolverARMVIXL::Exchange(vixl32::Register reg, int mem) {
+  UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
+  vixl32::Register temp = temps.Acquire();
+  __ Mov(temp, reg);
+  GetAssembler()->LoadFromOffset(kLoadWord, reg, sp, mem);
+  GetAssembler()->StoreToOffset(kStoreWord, temp, sp, mem);
 }
 
 void ParallelMoveResolverARMVIXL::Exchange(int mem1, int mem2) {
-  TODO_VIXL32(FATAL);
+  // TODO(VIXL32): Double check the performance of this implementation.
+  UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
+  vixl32::Register temp = temps.Acquire();
+  vixl32::SRegister temp_s = temps.AcquireS();
+
+  __ Ldr(temp, MemOperand(sp, mem1));
+  __ Vldr(temp_s, MemOperand(sp, mem2));
+  __ Str(temp, MemOperand(sp, mem2));
+  __ Vstr(temp_s, MemOperand(sp, mem1));
 }
 
 void ParallelMoveResolverARMVIXL::EmitSwap(size_t index) {
-  TODO_VIXL32(FATAL);
+  MoveOperands* move = moves_[index];
+  Location source = move->GetSource();
+  Location destination = move->GetDestination();
+  UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
+
+  if (source.IsRegister() && destination.IsRegister()) {
+    vixl32::Register temp = temps.Acquire();
+    DCHECK(!RegisterFrom(source).Is(temp));
+    DCHECK(!RegisterFrom(destination).Is(temp));
+    __ Mov(temp, RegisterFrom(destination));
+    __ Mov(RegisterFrom(destination), RegisterFrom(source));
+    __ Mov(RegisterFrom(source), temp);
+  } else if (source.IsRegister() && destination.IsStackSlot()) {
+    Exchange(RegisterFrom(source), destination.GetStackIndex());
+  } else if (source.IsStackSlot() && destination.IsRegister()) {
+    Exchange(RegisterFrom(destination), source.GetStackIndex());
+  } else if (source.IsStackSlot() && destination.IsStackSlot()) {
+    TODO_VIXL32(FATAL);
+  } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
+    TODO_VIXL32(FATAL);
+  } else if (source.IsRegisterPair() && destination.IsRegisterPair()) {
+    vixl32::DRegister temp = temps.AcquireD();
+    __ Vmov(temp, LowRegisterFrom(source), HighRegisterFrom(source));
+    __ Mov(LowRegisterFrom(source), LowRegisterFrom(destination));
+    __ Mov(HighRegisterFrom(source), HighRegisterFrom(destination));
+    __ Vmov(LowRegisterFrom(destination), HighRegisterFrom(destination), temp);
+  } else if (source.IsRegisterPair() || destination.IsRegisterPair()) {
+    vixl32::Register low_reg = LowRegisterFrom(source.IsRegisterPair() ? source : destination);
+    int mem = source.IsRegisterPair() ? destination.GetStackIndex() : source.GetStackIndex();
+    DCHECK(ExpectedPairLayout(source.IsRegisterPair() ? source : destination));
+    vixl32::DRegister temp = temps.AcquireD();
+    __ Vmov(temp, low_reg, vixl32::Register(low_reg.GetCode() + 1));
+    GetAssembler()->LoadFromOffset(kLoadWordPair, low_reg, sp, mem);
+    GetAssembler()->StoreDToOffset(temp, sp, mem);
+  } else if (source.IsFpuRegisterPair() && destination.IsFpuRegisterPair()) {
+    vixl32::DRegister first = DRegisterFrom(source);
+    vixl32::DRegister second = DRegisterFrom(destination);
+    vixl32::DRegister temp = temps.AcquireD();
+    __ Vmov(temp, first);
+    __ Vmov(first, second);
+    __ Vmov(second, temp);
+  } else if (source.IsFpuRegisterPair() || destination.IsFpuRegisterPair()) {
+    TODO_VIXL32(FATAL);
+  } else if (source.IsFpuRegister() || destination.IsFpuRegister()) {
+    TODO_VIXL32(FATAL);
+  } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
+    vixl32::DRegister temp1 = temps.AcquireD();
+    vixl32::DRegister temp2 = temps.AcquireD();
+    __ Vldr(temp1, MemOperand(sp, source.GetStackIndex()));
+    __ Vldr(temp2, MemOperand(sp, destination.GetStackIndex()));
+    __ Vstr(temp1, MemOperand(sp, destination.GetStackIndex()));
+    __ Vstr(temp2, MemOperand(sp, source.GetStackIndex()));
+  } else {
+    LOG(FATAL) << "Unimplemented" << source << " <-> " << destination;
+  }
 }
 
 void ParallelMoveResolverARMVIXL::SpillScratch(int reg ATTRIBUTE_UNUSED) {
@@ -2133,9 +4923,1237 @@
   TODO_VIXL32(FATAL);
 }
 
+// Check if the desired_class_load_kind is supported. If it is, return it,
+// otherwise return a fall-back kind that should be used instead.
+HLoadClass::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadClassKind(
+      HLoadClass::LoadKind desired_class_load_kind ATTRIBUTE_UNUSED) {
+  // TODO(VIXL): Implement optimized code paths.
+  return HLoadClass::LoadKind::kDexCacheViaMethod;
+}
 
-// TODO: Remove when codegen complete.
-#pragma GCC diagnostic pop
+void LocationsBuilderARMVIXL::VisitLoadClass(HLoadClass* cls) {
+  if (cls->NeedsAccessCheck()) {
+    InvokeRuntimeCallingConventionARMVIXL calling_convention;
+    CodeGenerator::CreateLoadClassLocationSummary(
+        cls,
+        LocationFrom(calling_convention.GetRegisterAt(0)),
+        LocationFrom(r0),
+        /* code_generator_supports_read_barrier */ true);
+    return;
+  }
+
+  // TODO(VIXL): read barrier code.
+  LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || kEmitCompilerReadBarrier)
+      ? LocationSummary::kCallOnSlowPath
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
+  HLoadClass::LoadKind load_kind = cls->GetLoadKind();
+  if (load_kind == HLoadClass::LoadKind::kReferrersClass ||
+      load_kind == HLoadClass::LoadKind::kDexCacheViaMethod ||
+      load_kind == HLoadClass::LoadKind::kDexCachePcRelative) {
+    locations->SetInAt(0, Location::RequiresRegister());
+  }
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitLoadClass(HLoadClass* cls) {
+  LocationSummary* locations = cls->GetLocations();
+  if (cls->NeedsAccessCheck()) {
+    codegen_->MoveConstant(locations->GetTemp(0), cls->GetTypeIndex());
+    codegen_->InvokeRuntime(kQuickInitializeTypeAndVerifyAccess, cls, cls->GetDexPc());
+    CheckEntrypointTypes<kQuickInitializeTypeAndVerifyAccess, void*, uint32_t>();
+    return;
+  }
+
+  Location out_loc = locations->Out();
+  vixl32::Register out = OutputRegister(cls);
+
+  // TODO(VIXL): read barrier code.
+  bool generate_null_check = false;
+  switch (cls->GetLoadKind()) {
+    case HLoadClass::LoadKind::kReferrersClass: {
+      DCHECK(!cls->CanCallRuntime());
+      DCHECK(!cls->MustGenerateClinitCheck());
+      // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
+      vixl32::Register current_method = InputRegisterAt(cls, 0);
+      GenerateGcRootFieldLoad(cls,
+                              out_loc,
+                              current_method,
+                              ArtMethod::DeclaringClassOffset().Int32Value(),
+                              kEmitCompilerReadBarrier);
+      break;
+    }
+    case HLoadClass::LoadKind::kDexCacheViaMethod: {
+      // /* GcRoot<mirror::Class>[] */ out =
+      //        current_method.ptr_sized_fields_->dex_cache_resolved_types_
+      vixl32::Register current_method = InputRegisterAt(cls, 0);
+      const int32_t resolved_types_offset =
+          ArtMethod::DexCacheResolvedTypesOffset(kArmPointerSize).Int32Value();
+      GetAssembler()->LoadFromOffset(kLoadWord, out, current_method, resolved_types_offset);
+      // /* GcRoot<mirror::Class> */ out = out[type_index]
+      size_t offset = CodeGenerator::GetCacheOffset(cls->GetTypeIndex());
+      GenerateGcRootFieldLoad(cls, out_loc, out, offset, kEmitCompilerReadBarrier);
+      generate_null_check = !cls->IsInDexCache();
+      break;
+    }
+    default:
+      TODO_VIXL32(FATAL);
+  }
+
+  if (generate_null_check || cls->MustGenerateClinitCheck()) {
+    DCHECK(cls->CanCallRuntime());
+    LoadClassSlowPathARMVIXL* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathARMVIXL(
+        cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
+    codegen_->AddSlowPath(slow_path);
+    if (generate_null_check) {
+      __ Cbz(out, slow_path->GetEntryLabel());
+    }
+    if (cls->MustGenerateClinitCheck()) {
+      GenerateClassInitializationCheck(slow_path, out);
+    } else {
+      __ Bind(slow_path->GetExitLabel());
+    }
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitClinitCheck(HClinitCheck* check) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
+  locations->SetInAt(0, Location::RequiresRegister());
+  if (check->HasUses()) {
+    locations->SetOut(Location::SameAsFirstInput());
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitClinitCheck(HClinitCheck* check) {
+  // We assume the class is not null.
+  LoadClassSlowPathARMVIXL* slow_path =
+      new (GetGraph()->GetArena()) LoadClassSlowPathARMVIXL(check->GetLoadClass(),
+                                                            check,
+                                                            check->GetDexPc(),
+                                                            /* do_clinit */ true);
+  codegen_->AddSlowPath(slow_path);
+  GenerateClassInitializationCheck(slow_path, InputRegisterAt(check, 0));
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateClassInitializationCheck(
+    LoadClassSlowPathARMVIXL* slow_path, vixl32::Register class_reg) {
+  UseScratchRegisterScope temps(GetVIXLAssembler());
+  vixl32::Register temp = temps.Acquire();
+  GetAssembler()->LoadFromOffset(kLoadWord,
+                                 temp,
+                                 class_reg,
+                                 mirror::Class::StatusOffset().Int32Value());
+  __ Cmp(temp, mirror::Class::kStatusInitialized);
+  __ B(lt, slow_path->GetEntryLabel());
+  // Even if the initialized flag is set, we may be in a situation where caches are not synced
+  // properly. Therefore, we do a memory fence.
+  __ Dmb(ISH);
+  __ Bind(slow_path->GetExitLabel());
+}
+
+// Check if the desired_string_load_kind is supported. If it is, return it,
+// otherwise return a fall-back kind that should be used instead.
+HLoadString::LoadKind CodeGeneratorARMVIXL::GetSupportedLoadStringKind(
+      HLoadString::LoadKind desired_string_load_kind ATTRIBUTE_UNUSED) {
+  // TODO(VIXL): Implement optimized code paths. For now we always use the simpler fallback code.
+  return HLoadString::LoadKind::kDexCacheViaMethod;
+}
+
+void LocationsBuilderARMVIXL::VisitLoadString(HLoadString* load) {
+  LocationSummary::CallKind call_kind = load->NeedsEnvironment()
+      ? LocationSummary::kCallOnMainOnly
+      : LocationSummary::kNoCall;
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
+
+  // TODO(VIXL): Implement optimized code paths.
+  // See InstructionCodeGeneratorARMVIXL::VisitLoadString.
+  HLoadString::LoadKind load_kind = load->GetLoadKind();
+  if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
+    locations->SetInAt(0, Location::RequiresRegister());
+    // TODO(VIXL): Use InvokeRuntimeCallingConventionARMVIXL instead.
+    locations->SetOut(LocationFrom(r0));
+  } else {
+    locations->SetOut(Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitLoadString(HLoadString* load) {
+  // TODO(VIXL): Implement optimized code paths.
+  // We implemented the simplest solution to get first ART tests passing, we deferred the
+  // optimized path until later, we should implement it using ARM64 implementation as a
+  // reference. The same related to LocationsBuilderARMVIXL::VisitLoadString.
+
+  // TODO: Re-add the compiler code to do string dex cache lookup again.
+  DCHECK_EQ(load->GetLoadKind(), HLoadString::LoadKind::kDexCacheViaMethod);
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  __ Mov(calling_convention.GetRegisterAt(0), load->GetStringIndex());
+  codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
+  CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
+}
+
+static int32_t GetExceptionTlsOffset() {
+  return Thread::ExceptionOffset<kArmPointerSize>().Int32Value();
+}
+
+void LocationsBuilderARMVIXL::VisitLoadException(HLoadException* load) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitLoadException(HLoadException* load) {
+  vixl32::Register out = OutputRegister(load);
+  GetAssembler()->LoadFromOffset(kLoadWord, out, tr, GetExceptionTlsOffset());
+}
+
+
+void LocationsBuilderARMVIXL::VisitClearException(HClearException* clear) {
+  new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
+  UseScratchRegisterScope temps(GetVIXLAssembler());
+  vixl32::Register temp = temps.Acquire();
+  __ Mov(temp, 0);
+  GetAssembler()->StoreToOffset(kStoreWord, temp, tr, GetExceptionTlsOffset());
+}
+
+void LocationsBuilderARMVIXL::VisitThrow(HThrow* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitThrow(HThrow* instruction) {
+  codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
+  CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
+}
+
+static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+  return kEmitCompilerReadBarrier &&
+      (kUseBakerReadBarrier ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck);
+}
+
+
+void LocationsBuilderARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
+  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  bool baker_read_barrier_slow_path = false;
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck:
+    case TypeCheckKind::kAbstractClassCheck:
+    case TypeCheckKind::kClassHierarchyCheck:
+    case TypeCheckKind::kArrayObjectCheck:
+      call_kind =
+          kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
+      baker_read_barrier_slow_path = kUseBakerReadBarrier;
+      break;
+    case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
+      call_kind = LocationSummary::kCallOnSlowPath;
+      break;
+  }
+
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  if (baker_read_barrier_slow_path) {
+    locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  }
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  // The "out" register is used as a temporary, so it overlaps with the inputs.
+  // Note that TypeCheckSlowPathARM uses this register too.
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+  // When read barriers are enabled, we need a temporary register for
+  // some cases.
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
+    locations->AddTemp(Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitInstanceOf(HInstanceOf* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  LocationSummary* locations = instruction->GetLocations();
+  Location obj_loc = locations->InAt(0);
+  vixl32::Register obj = InputRegisterAt(instruction, 0);
+  vixl32::Register cls = InputRegisterAt(instruction, 1);
+  Location out_loc = locations->Out();
+  vixl32::Register out = OutputRegister(instruction);
+  Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(0) :
+      Location::NoLocation();
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  vixl32::Label done, zero;
+  SlowPathCodeARMVIXL* slow_path = nullptr;
+
+  // Return 0 if `obj` is null.
+  // avoid null check if we know obj is not null.
+  if (instruction->MustDoNullCheck()) {
+    __ Cbz(obj, &zero);
+  }
+
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc);
+      __ Cmp(out, cls);
+      // Classes must be equal for the instanceof to succeed.
+      __ B(ne, &zero);
+      __ Mov(out, 1);
+      __ B(&done);
+      break;
+    }
+
+    case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc);
+      // If the class is abstract, we eagerly fetch the super class of the
+      // object to avoid doing a comparison we know will fail.
+      vixl32::Label loop;
+      __ Bind(&loop);
+      // /* HeapReference<Class> */ out = out->super_class_
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
+      // If `out` is null, we use it for the result, and jump to `done`.
+      __ Cbz(out, &done);
+      __ Cmp(out, cls);
+      __ B(ne, &loop);
+      __ Mov(out, 1);
+      if (zero.IsReferenced()) {
+        __ B(&done);
+      }
+      break;
+    }
+
+    case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc);
+      // Walk over the class hierarchy to find a match.
+      vixl32::Label loop, success;
+      __ Bind(&loop);
+      __ Cmp(out, cls);
+      __ B(eq, &success);
+      // /* HeapReference<Class> */ out = out->super_class_
+      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
+      __ Cbnz(out, &loop);
+      // If `out` is null, we use it for the result, and jump to `done`.
+      __ B(&done);
+      __ Bind(&success);
+      __ Mov(out, 1);
+      if (zero.IsReferenced()) {
+        __ B(&done);
+      }
+      break;
+    }
+
+    case TypeCheckKind::kArrayObjectCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc);
+      // Do an exact check.
+      vixl32::Label exact_check;
+      __ Cmp(out, cls);
+      __ B(eq, &exact_check);
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      // /* HeapReference<Class> */ out = out->component_type_
+      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
+      // If `out` is null, we use it for the result, and jump to `done`.
+      __ Cbz(out, &done);
+      GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, out, out, primitive_offset);
+      static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+      __ Cbnz(out, &zero);
+      __ Bind(&exact_check);
+      __ Mov(out, 1);
+      __ B(&done);
+      break;
+    }
+
+    case TypeCheckKind::kArrayCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        maybe_temp_loc);
+      __ Cmp(out, cls);
+      DCHECK(locations->OnlyCallsOnSlowPath());
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARMVIXL(instruction,
+                                                                        /* is_fatal */ false);
+      codegen_->AddSlowPath(slow_path);
+      __ B(ne, slow_path->GetEntryLabel());
+      __ Mov(out, 1);
+      if (zero.IsReferenced()) {
+        __ B(&done);
+      }
+      break;
+    }
+
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck: {
+      // Note that we indeed only call on slow path, but we always go
+      // into the slow path for the unresolved and interface check
+      // cases.
+      //
+      // We cannot directly call the InstanceofNonTrivial runtime
+      // entry point without resorting to a type checking slow path
+      // here (i.e. by calling InvokeRuntime directly), as it would
+      // require to assign fixed registers for the inputs of this
+      // HInstanceOf instruction (following the runtime calling
+      // convention), which might be cluttered by the potential first
+      // read barrier emission at the beginning of this method.
+      //
+      // TODO: Introduce a new runtime entry point taking the object
+      // to test (instead of its class) as argument, and let it deal
+      // with the read barrier issues. This will let us refactor this
+      // case of the `switch` code as it was previously (with a direct
+      // call to the runtime not using a type checking slow path).
+      // This should also be beneficial for the other cases above.
+      DCHECK(locations->OnlyCallsOnSlowPath());
+      slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathARMVIXL(instruction,
+                                                                        /* is_fatal */ false);
+      codegen_->AddSlowPath(slow_path);
+      __ B(slow_path->GetEntryLabel());
+      if (zero.IsReferenced()) {
+        __ B(&done);
+      }
+      break;
+    }
+  }
+
+  if (zero.IsReferenced()) {
+    __ Bind(&zero);
+    __ Mov(out, 0);
+  }
+
+  if (done.IsReferenced()) {
+    __ Bind(&done);
+  }
+
+  if (slow_path != nullptr) {
+    __ Bind(slow_path->GetExitLabel());
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitCheckCast(HCheckCast* instruction) {
+  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
+  bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
+
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck:
+    case TypeCheckKind::kAbstractClassCheck:
+    case TypeCheckKind::kClassHierarchyCheck:
+    case TypeCheckKind::kArrayObjectCheck:
+      call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
+          LocationSummary::kCallOnSlowPath :
+          LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
+      break;
+    case TypeCheckKind::kArrayCheck:
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
+      call_kind = LocationSummary::kCallOnSlowPath;
+      break;
+  }
+
+  LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  // Note that TypeCheckSlowPathARM uses this "temp" register too.
+  locations->AddTemp(Location::RequiresRegister());
+  // When read barriers are enabled, we need an additional temporary
+  // register for some cases.
+  if (TypeCheckNeedsATemporary(type_check_kind)) {
+    locations->AddTemp(Location::RequiresRegister());
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitCheckCast(HCheckCast* instruction) {
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  LocationSummary* locations = instruction->GetLocations();
+  Location obj_loc = locations->InAt(0);
+  vixl32::Register obj = InputRegisterAt(instruction, 0);
+  vixl32::Register cls = InputRegisterAt(instruction, 1);
+  Location temp_loc = locations->GetTemp(0);
+  vixl32::Register temp = RegisterFrom(temp_loc);
+  Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+      locations->GetTemp(1) :
+      Location::NoLocation();
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+
+  bool is_type_check_slow_path_fatal =
+      (type_check_kind == TypeCheckKind::kExactCheck ||
+       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
+      !instruction->CanThrowIntoCatchBlock();
+  SlowPathCodeARMVIXL* type_check_slow_path =
+      new (GetGraph()->GetArena()) TypeCheckSlowPathARMVIXL(instruction,
+                                                            is_type_check_slow_path_fatal);
+  codegen_->AddSlowPath(type_check_slow_path);
+
+  vixl32::Label done;
+  // Avoid null check if we know obj is not null.
+  if (instruction->MustDoNullCheck()) {
+    __ Cbz(obj, &done);
+  }
+
+  // /* HeapReference<Class> */ temp = obj->klass_
+  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset, maybe_temp2_loc);
+
+  switch (type_check_kind) {
+    case TypeCheckKind::kExactCheck:
+    case TypeCheckKind::kArrayCheck: {
+      __ Cmp(temp, cls);
+      // Jump to slow path for throwing the exception or doing a
+      // more involved array check.
+      __ B(ne, type_check_slow_path->GetEntryLabel());
+      break;
+    }
+
+    case TypeCheckKind::kAbstractClassCheck: {
+      // If the class is abstract, we eagerly fetch the super class of the
+      // object to avoid doing a comparison we know will fail.
+      vixl32::Label loop;
+      __ Bind(&loop);
+      // /* HeapReference<Class> */ temp = temp->super_class_
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception.
+      __ Cbz(temp, type_check_slow_path->GetEntryLabel());
+
+      // Otherwise, compare the classes.
+      __ Cmp(temp, cls);
+      __ B(ne, &loop);
+      break;
+    }
+
+    case TypeCheckKind::kClassHierarchyCheck: {
+      // Walk over the class hierarchy to find a match.
+      vixl32::Label loop;
+      __ Bind(&loop);
+      __ Cmp(temp, cls);
+      __ B(eq, &done);
+
+      // /* HeapReference<Class> */ temp = temp->super_class_
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception.
+      __ Cbz(temp, type_check_slow_path->GetEntryLabel());
+      // Otherwise, jump to the beginning of the loop.
+      __ B(&loop);
+      break;
+    }
+
+    case TypeCheckKind::kArrayObjectCheck:  {
+      // Do an exact check.
+      __ Cmp(temp, cls);
+      __ B(eq, &done);
+
+      // Otherwise, we need to check that the object's class is a non-primitive array.
+      // /* HeapReference<Class> */ temp = temp->component_type_
+      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
+      // If the component type is null, jump to the slow path to throw the exception.
+      __ Cbz(temp, type_check_slow_path->GetEntryLabel());
+      // Otherwise,the object is indeed an array, jump to label `check_non_primitive_component_type`
+      // to further check that this component type is not a primitive type.
+      GetAssembler()->LoadFromOffset(kLoadUnsignedHalfword, temp, temp, primitive_offset);
+      static_assert(Primitive::kPrimNot == 0, "Expected 0 for art::Primitive::kPrimNot");
+      __ Cbnz(temp, type_check_slow_path->GetEntryLabel());
+      break;
+    }
+
+    case TypeCheckKind::kUnresolvedCheck:
+    case TypeCheckKind::kInterfaceCheck:
+      // We always go into the type check slow path for the unresolved
+      // and interface check cases.
+      //
+      // We cannot directly call the CheckCast runtime entry point
+      // without resorting to a type checking slow path here (i.e. by
+      // calling InvokeRuntime directly), as it would require to
+      // assign fixed registers for the inputs of this HInstanceOf
+      // instruction (following the runtime calling convention), which
+      // might be cluttered by the potential first read barrier
+      // emission at the beginning of this method.
+      __ B(type_check_slow_path->GetEntryLabel());
+      break;
+  }
+  __ Bind(&done);
+
+  __ Bind(type_check_slow_path->GetExitLabel());
+}
+
+void LocationsBuilderARMVIXL::VisitMonitorOperation(HMonitorOperation* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitMonitorOperation(HMonitorOperation* instruction) {
+  codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
+                          instruction,
+                          instruction->GetDexPc());
+  if (instruction->IsEnter()) {
+    CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
+  } else {
+    CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitAnd(HAnd* instruction) {
+  HandleBitwiseOperation(instruction, AND);
+}
+
+void LocationsBuilderARMVIXL::VisitOr(HOr* instruction) {
+  HandleBitwiseOperation(instruction, ORR);
+}
+
+void LocationsBuilderARMVIXL::VisitXor(HXor* instruction) {
+  HandleBitwiseOperation(instruction, EOR);
+}
+
+void LocationsBuilderARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction, Opcode opcode) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  DCHECK(instruction->GetResultType() == Primitive::kPrimInt
+         || instruction->GetResultType() == Primitive::kPrimLong);
+  // Note: GVN reorders commutative operations to have the constant on the right hand side.
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, ArmEncodableConstantOrRegister(instruction->InputAt(1), opcode));
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitAnd(HAnd* instruction) {
+  HandleBitwiseOperation(instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitOr(HOr* instruction) {
+  HandleBitwiseOperation(instruction);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitXor(HXor* instruction) {
+  HandleBitwiseOperation(instruction);
+}
+
+void LocationsBuilderARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
+  DCHECK(instruction->GetResultType() == Primitive::kPrimInt
+         || instruction->GetResultType() == Primitive::kPrimLong);
+
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitBitwiseNegatedRight(HBitwiseNegatedRight* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+  Location out = locations->Out();
+
+  if (instruction->GetResultType() == Primitive::kPrimInt) {
+    vixl32::Register first_reg = RegisterFrom(first);
+    vixl32::Register second_reg = RegisterFrom(second);
+    vixl32::Register out_reg = RegisterFrom(out);
+
+    switch (instruction->GetOpKind()) {
+      case HInstruction::kAnd:
+        __ Bic(out_reg, first_reg, second_reg);
+        break;
+      case HInstruction::kOr:
+        __ Orn(out_reg, first_reg, second_reg);
+        break;
+      // There is no EON on arm.
+      case HInstruction::kXor:
+      default:
+        LOG(FATAL) << "Unexpected instruction " << instruction->DebugName();
+        UNREACHABLE();
+    }
+    return;
+
+  } else {
+    DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
+    vixl32::Register first_low = LowRegisterFrom(first);
+    vixl32::Register first_high = HighRegisterFrom(first);
+    vixl32::Register second_low = LowRegisterFrom(second);
+    vixl32::Register second_high = HighRegisterFrom(second);
+    vixl32::Register out_low = LowRegisterFrom(out);
+    vixl32::Register out_high = HighRegisterFrom(out);
+
+    switch (instruction->GetOpKind()) {
+      case HInstruction::kAnd:
+        __ Bic(out_low, first_low, second_low);
+        __ Bic(out_high, first_high, second_high);
+        break;
+      case HInstruction::kOr:
+        __ Orn(out_low, first_low, second_low);
+        __ Orn(out_high, first_high, second_high);
+        break;
+      // There is no EON on arm.
+      case HInstruction::kXor:
+      default:
+        LOG(FATAL) << "Unexpected instruction " << instruction->DebugName();
+        UNREACHABLE();
+    }
+  }
+}
+
+// TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
+void InstructionCodeGeneratorARMVIXL::GenerateAndConst(vixl32::Register out,
+                                                       vixl32::Register first,
+                                                       uint32_t value) {
+  // Optimize special cases for individual halfs of `and-long` (`and` is simplified earlier).
+  if (value == 0xffffffffu) {
+    if (!out.Is(first)) {
+      __ Mov(out, first);
+    }
+    return;
+  }
+  if (value == 0u) {
+    __ Mov(out, 0);
+    return;
+  }
+  if (GetAssembler()->ShifterOperandCanHold(AND, value)) {
+  __ And(out, first, value);
+  } else {
+    DCHECK(GetAssembler()->ShifterOperandCanHold(BIC, ~value));
+  __ Bic(out, first, ~value);
+  }
+}
+
+// TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
+void InstructionCodeGeneratorARMVIXL::GenerateOrrConst(vixl32::Register out,
+                                                       vixl32::Register first,
+                                                       uint32_t value) {
+  // Optimize special cases for individual halfs of `or-long` (`or` is simplified earlier).
+  if (value == 0u) {
+    if (!out.Is(first)) {
+      __ Mov(out, first);
+    }
+    return;
+  }
+  if (value == 0xffffffffu) {
+    __ Mvn(out, 0);
+    return;
+  }
+  if (GetAssembler()->ShifterOperandCanHold(ORR, value)) {
+    __ Orr(out, first, value);
+  } else {
+    DCHECK(GetAssembler()->ShifterOperandCanHold(ORN, ~value));
+    __ Orn(out, first, ~value);
+  }
+}
+
+// TODO(VIXL): Remove optimizations in the helper when they are implemented in vixl.
+void InstructionCodeGeneratorARMVIXL::GenerateEorConst(vixl32::Register out,
+                                                       vixl32::Register first,
+                                                       uint32_t value) {
+  // Optimize special case for individual halfs of `xor-long` (`xor` is simplified earlier).
+  if (value == 0u) {
+    if (!out.Is(first)) {
+      __ Mov(out, first);
+    }
+    return;
+  }
+  __ Eor(out, first, value);
+}
+
+void InstructionCodeGeneratorARMVIXL::HandleBitwiseOperation(HBinaryOperation* instruction) {
+  LocationSummary* locations = instruction->GetLocations();
+  Location first = locations->InAt(0);
+  Location second = locations->InAt(1);
+  Location out = locations->Out();
+
+  if (second.IsConstant()) {
+    uint64_t value = static_cast<uint64_t>(Int64FromConstant(second.GetConstant()));
+    uint32_t value_low = Low32Bits(value);
+    if (instruction->GetResultType() == Primitive::kPrimInt) {
+      vixl32::Register first_reg = InputRegisterAt(instruction, 0);
+      vixl32::Register out_reg = OutputRegister(instruction);
+      if (instruction->IsAnd()) {
+        GenerateAndConst(out_reg, first_reg, value_low);
+      } else if (instruction->IsOr()) {
+        GenerateOrrConst(out_reg, first_reg, value_low);
+      } else {
+        DCHECK(instruction->IsXor());
+        GenerateEorConst(out_reg, first_reg, value_low);
+      }
+    } else {
+      DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
+      uint32_t value_high = High32Bits(value);
+      vixl32::Register first_low = LowRegisterFrom(first);
+      vixl32::Register first_high = HighRegisterFrom(first);
+      vixl32::Register out_low = LowRegisterFrom(out);
+      vixl32::Register out_high = HighRegisterFrom(out);
+      if (instruction->IsAnd()) {
+        GenerateAndConst(out_low, first_low, value_low);
+        GenerateAndConst(out_high, first_high, value_high);
+      } else if (instruction->IsOr()) {
+        GenerateOrrConst(out_low, first_low, value_low);
+        GenerateOrrConst(out_high, first_high, value_high);
+      } else {
+        DCHECK(instruction->IsXor());
+        GenerateEorConst(out_low, first_low, value_low);
+        GenerateEorConst(out_high, first_high, value_high);
+      }
+    }
+    return;
+  }
+
+  if (instruction->GetResultType() == Primitive::kPrimInt) {
+    vixl32::Register first_reg = InputRegisterAt(instruction, 0);
+    vixl32::Register second_reg = InputRegisterAt(instruction, 1);
+    vixl32::Register out_reg = OutputRegister(instruction);
+    if (instruction->IsAnd()) {
+      __ And(out_reg, first_reg, second_reg);
+    } else if (instruction->IsOr()) {
+      __ Orr(out_reg, first_reg, second_reg);
+    } else {
+      DCHECK(instruction->IsXor());
+      __ Eor(out_reg, first_reg, second_reg);
+    }
+  } else {
+    DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
+    vixl32::Register first_low = LowRegisterFrom(first);
+    vixl32::Register first_high = HighRegisterFrom(first);
+    vixl32::Register second_low = LowRegisterFrom(second);
+    vixl32::Register second_high = HighRegisterFrom(second);
+    vixl32::Register out_low = LowRegisterFrom(out);
+    vixl32::Register out_high = HighRegisterFrom(out);
+    if (instruction->IsAnd()) {
+      __ And(out_low, first_low, second_low);
+      __ And(out_high, first_high, second_high);
+    } else if (instruction->IsOr()) {
+      __ Orr(out_low, first_low, second_low);
+      __ Orr(out_high, first_high, second_high);
+    } else {
+      DCHECK(instruction->IsXor());
+      __ Eor(out_low, first_low, second_low);
+      __ Eor(out_high, first_high, second_high);
+    }
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadOneRegister(
+    HInstruction* instruction ATTRIBUTE_UNUSED,
+    Location out,
+    uint32_t offset,
+    Location maybe_temp ATTRIBUTE_UNUSED) {
+  vixl32::Register out_reg = RegisterFrom(out);
+  if (kEmitCompilerReadBarrier) {
+    TODO_VIXL32(FATAL);
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(out + offset)
+    GetAssembler()->LoadFromOffset(kLoadWord, out_reg, out_reg, offset);
+    GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateReferenceLoadTwoRegisters(
+    HInstruction* instruction ATTRIBUTE_UNUSED,
+    Location out,
+    Location obj,
+    uint32_t offset,
+    Location maybe_temp ATTRIBUTE_UNUSED) {
+  vixl32::Register out_reg = RegisterFrom(out);
+  vixl32::Register obj_reg = RegisterFrom(obj);
+  if (kEmitCompilerReadBarrier) {
+    TODO_VIXL32(FATAL);
+  } else {
+    // Plain load with no read barrier.
+    // /* HeapReference<Object> */ out = *(obj + offset)
+    GetAssembler()->LoadFromOffset(kLoadWord, out_reg, obj_reg, offset);
+    GetAssembler()->MaybeUnpoisonHeapReference(out_reg);
+  }
+}
+
+void InstructionCodeGeneratorARMVIXL::GenerateGcRootFieldLoad(
+    HInstruction* instruction ATTRIBUTE_UNUSED,
+    Location root,
+    vixl32::Register obj,
+    uint32_t offset,
+    bool requires_read_barrier) {
+  vixl32::Register root_reg = RegisterFrom(root);
+  if (requires_read_barrier) {
+    TODO_VIXL32(FATAL);
+  } else {
+    // Plain GC root load with no read barrier.
+    // /* GcRoot<mirror::Object> */ root = *(obj + offset)
+    GetAssembler()->LoadFromOffset(kLoadWord, root_reg, obj, offset);
+    // Note that GC roots are not affected by heap poisoning, thus we
+    // do not have to unpoison `root_reg` here.
+  }
+}
+
+void CodeGeneratorARMVIXL::GenerateFieldLoadWithBakerReadBarrier(
+    HInstruction* instruction ATTRIBUTE_UNUSED,
+    Location ref ATTRIBUTE_UNUSED,
+    vixl::aarch32::Register obj ATTRIBUTE_UNUSED,
+    uint32_t offset ATTRIBUTE_UNUSED,
+    Location temp ATTRIBUTE_UNUSED,
+    bool needs_null_check ATTRIBUTE_UNUSED) {
+  TODO_VIXL32(FATAL);
+}
+
+void CodeGeneratorARMVIXL::GenerateReferenceLoadWithBakerReadBarrier(
+    HInstruction* instruction ATTRIBUTE_UNUSED,
+    Location ref ATTRIBUTE_UNUSED,
+    vixl::aarch32::Register obj ATTRIBUTE_UNUSED,
+    uint32_t offset ATTRIBUTE_UNUSED,
+    Location index ATTRIBUTE_UNUSED,
+    ScaleFactor scale_factor ATTRIBUTE_UNUSED,
+    Location temp ATTRIBUTE_UNUSED,
+    bool needs_null_check ATTRIBUTE_UNUSED,
+    bool always_update_field ATTRIBUTE_UNUSED,
+    vixl::aarch32::Register* temp2 ATTRIBUTE_UNUSED) {
+  TODO_VIXL32(FATAL);
+}
+
+void CodeGeneratorARMVIXL::GenerateReadBarrierSlow(HInstruction* instruction ATTRIBUTE_UNUSED,
+                                                   Location out ATTRIBUTE_UNUSED,
+                                                   Location ref ATTRIBUTE_UNUSED,
+                                                   Location obj ATTRIBUTE_UNUSED,
+                                                   uint32_t offset ATTRIBUTE_UNUSED,
+                                                   Location index ATTRIBUTE_UNUSED) {
+  TODO_VIXL32(FATAL);
+}
+
+void CodeGeneratorARMVIXL::MaybeGenerateReadBarrierSlow(HInstruction* instruction ATTRIBUTE_UNUSED,
+                                                        Location out,
+                                                        Location ref ATTRIBUTE_UNUSED,
+                                                        Location obj ATTRIBUTE_UNUSED,
+                                                        uint32_t offset ATTRIBUTE_UNUSED,
+                                                        Location index ATTRIBUTE_UNUSED) {
+  if (kEmitCompilerReadBarrier) {
+    DCHECK(!kUseBakerReadBarrier);
+    TODO_VIXL32(FATAL);
+  } else if (kPoisonHeapReferences) {
+    GetAssembler()->UnpoisonHeapReference(RegisterFrom(out));
+  }
+}
+
+// Check if the desired_dispatch_info is supported. If it is, return it,
+// otherwise return a fall-back info that should be used instead.
+HInvokeStaticOrDirect::DispatchInfo CodeGeneratorARMVIXL::GetSupportedInvokeStaticOrDirectDispatch(
+      const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info ATTRIBUTE_UNUSED,
+      HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
+  // TODO(VIXL): Implement optimized code paths.
+  return {
+    HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod,
+    HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod,
+    0u,
+    0u
+  };
+}
+
+vixl32::Register CodeGeneratorARMVIXL::GetInvokeStaticOrDirectExtraParameter(
+    HInvokeStaticOrDirect* invoke, vixl32::Register temp) {
+  DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
+  Location location = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+  if (!invoke->GetLocations()->Intrinsified()) {
+    return RegisterFrom(location);
+  }
+  // For intrinsics we allow any location, so it may be on the stack.
+  if (!location.IsRegister()) {
+    GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, location.GetStackIndex());
+    return temp;
+  }
+  // For register locations, check if the register was saved. If so, get it from the stack.
+  // Note: There is a chance that the register was saved but not overwritten, so we could
+  // save one load. However, since this is just an intrinsic slow path we prefer this
+  // simple and more robust approach rather that trying to determine if that's the case.
+  SlowPathCode* slow_path = GetCurrentSlowPath();
+  DCHECK(slow_path != nullptr);  // For intrinsified invokes the call is emitted on the slow path.
+  if (slow_path->IsCoreRegisterSaved(RegisterFrom(location).GetCode())) {
+    int stack_offset = slow_path->GetStackOffsetOfCoreRegister(RegisterFrom(location).GetCode());
+    GetAssembler()->LoadFromOffset(kLoadWord, temp, sp, stack_offset);
+    return temp;
+  }
+  return RegisterFrom(location);
+}
+
+void CodeGeneratorARMVIXL::GenerateStaticOrDirectCall(
+    HInvokeStaticOrDirect* invoke, Location temp) {
+  Location callee_method = temp;  // For all kinds except kRecursive, callee will be in temp.
+  vixl32::Register temp_reg = RegisterFrom(temp);
+
+  switch (invoke->GetMethodLoadKind()) {
+    case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
+      uint32_t offset =
+          GetThreadOffset<kArmPointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
+      // temp = thread->string_init_entrypoint
+      GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, tr, offset);
+      break;
+    }
+    case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
+      Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
+      vixl32::Register method_reg;
+      if (current_method.IsRegister()) {
+        method_reg = RegisterFrom(current_method);
+      } else {
+        DCHECK(invoke->GetLocations()->Intrinsified());
+        DCHECK(!current_method.IsValid());
+        method_reg = temp_reg;
+        GetAssembler()->LoadFromOffset(kLoadWord, temp_reg, sp, kCurrentMethodStackOffset);
+      }
+      // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
+      GetAssembler()->LoadFromOffset(
+          kLoadWord,
+          temp_reg,
+          method_reg,
+          ArtMethod::DexCacheResolvedMethodsOffset(kArmPointerSize).Int32Value());
+      // temp = temp[index_in_cache];
+      // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
+      uint32_t index_in_cache = invoke->GetDexMethodIndex();
+      GetAssembler()->LoadFromOffset(
+          kLoadWord, temp_reg, temp_reg, CodeGenerator::GetCachePointerOffset(index_in_cache));
+      break;
+    }
+    default:
+      TODO_VIXL32(FATAL);
+  }
+
+  // TODO(VIXL): Support `CodePtrLocation` values other than `kCallArtMethod`.
+  if (invoke->GetCodePtrLocation() != HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod) {
+    TODO_VIXL32(FATAL);
+  }
+
+  // LR = callee_method->entry_point_from_quick_compiled_code_
+  GetAssembler()->LoadFromOffset(
+      kLoadWord,
+      lr,
+      RegisterFrom(callee_method),
+      ArtMethod::EntryPointFromQuickCompiledCodeOffset(kArmPointerSize).Int32Value());
+  // LR()
+  __ Blx(lr);
+
+  DCHECK(!IsLeafMethod());
+}
+
+void CodeGeneratorARMVIXL::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_location) {
+  vixl32::Register temp = RegisterFrom(temp_location);
+  uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
+      invoke->GetVTableIndex(), kArmPointerSize).Uint32Value();
+
+  // Use the calling convention instead of the location of the receiver, as
+  // intrinsics may have put the receiver in a different register. In the intrinsics
+  // slow path, the arguments have been moved to the right place, so here we are
+  // guaranteed that the receiver is the first register of the calling convention.
+  InvokeDexCallingConventionARMVIXL calling_convention;
+  vixl32::Register receiver = calling_convention.GetRegisterAt(0);
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  // /* HeapReference<Class> */ temp = receiver->klass_
+  GetAssembler()->LoadFromOffset(kLoadWord, temp, receiver, class_offset);
+  MaybeRecordImplicitNullCheck(invoke);
+  // Instead of simply (possibly) unpoisoning `temp` here, we should
+  // emit a read barrier for the previous class reference load.
+  // However this is not required in practice, as this is an
+  // intermediate/temporary reference and because the current
+  // concurrent copying collector keeps the from-space memory
+  // intact/accessible until the end of the marking phase (the
+  // concurrent copying collector may not in the future).
+  GetAssembler()->MaybeUnpoisonHeapReference(temp);
+
+  // temp = temp->GetMethodAt(method_offset);
+  uint32_t entry_point = ArtMethod::EntryPointFromQuickCompiledCodeOffset(
+      kArmPointerSize).Int32Value();
+  GetAssembler()->LoadFromOffset(kLoadWord, temp, temp, method_offset);
+  // LR = temp->GetEntryPoint();
+  GetAssembler()->LoadFromOffset(kLoadWord, lr, temp, entry_point);
+  // LR();
+  __ Blx(lr);
+}
+
+void LocationsBuilderARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(instr, LocationSummary::kNoCall);
+  locations->SetInAt(HMultiplyAccumulate::kInputAccumulatorIndex,
+                     Location::RequiresRegister());
+  locations->SetInAt(HMultiplyAccumulate::kInputMulLeftIndex, Location::RequiresRegister());
+  locations->SetInAt(HMultiplyAccumulate::kInputMulRightIndex, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitMultiplyAccumulate(HMultiplyAccumulate* instr) {
+  vixl32::Register res = OutputRegister(instr);
+  vixl32::Register accumulator =
+      InputRegisterAt(instr, HMultiplyAccumulate::kInputAccumulatorIndex);
+  vixl32::Register mul_left =
+      InputRegisterAt(instr, HMultiplyAccumulate::kInputMulLeftIndex);
+  vixl32::Register mul_right =
+      InputRegisterAt(instr, HMultiplyAccumulate::kInputMulRightIndex);
+
+  if (instr->GetOpKind() == HInstruction::kAdd) {
+    __ Mla(res, mul_left, mul_right, accumulator);
+  } else {
+    __ Mls(res, mul_left, mul_right, accumulator);
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+  // Nothing to do, this should be removed during prepare for register allocator.
+  LOG(FATAL) << "Unreachable";
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
+  // Nothing to do, this should be removed during prepare for register allocator.
+  LOG(FATAL) << "Unreachable";
+}
+
+// Simple implementation of packed switch - generate cascaded compare/jumps.
+void LocationsBuilderARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+  LocationSummary* locations =
+      new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
+  locations->SetInAt(0, Location::RequiresRegister());
+  if (switch_instr->GetNumEntries() > kPackedSwitchCompareJumpThreshold &&
+      codegen_->GetAssembler()->GetVIXLAssembler()->IsUsingT32()) {
+    locations->AddTemp(Location::RequiresRegister());  // We need a temp for the table base.
+    if (switch_instr->GetStartValue() != 0) {
+      locations->AddTemp(Location::RequiresRegister());  // We need a temp for the bias.
+    }
+  }
+}
+
+// TODO(VIXL): Investigate and reach the parity with old arm codegen.
+void InstructionCodeGeneratorARMVIXL::VisitPackedSwitch(HPackedSwitch* switch_instr) {
+  int32_t lower_bound = switch_instr->GetStartValue();
+  uint32_t num_entries = switch_instr->GetNumEntries();
+  LocationSummary* locations = switch_instr->GetLocations();
+  vixl32::Register value_reg = InputRegisterAt(switch_instr, 0);
+  HBasicBlock* default_block = switch_instr->GetDefaultBlock();
+
+  if (num_entries <= kPackedSwitchCompareJumpThreshold ||
+      !codegen_->GetAssembler()->GetVIXLAssembler()->IsUsingT32()) {
+    // Create a series of compare/jumps.
+    UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
+    vixl32::Register temp_reg = temps.Acquire();
+    // Note: It is fine for the below AddConstantSetFlags() using IP register to temporarily store
+    // the immediate, because IP is used as the destination register. For the other
+    // AddConstantSetFlags() and GenerateCompareWithImmediate(), the immediate values are constant,
+    // and they can be encoded in the instruction without making use of IP register.
+    __ Adds(temp_reg, value_reg, -lower_bound);
+
+    const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
+    // Jump to successors[0] if value == lower_bound.
+    __ B(eq, codegen_->GetLabelOf(successors[0]));
+    int32_t last_index = 0;
+    for (; num_entries - last_index > 2; last_index += 2) {
+      __ Adds(temp_reg, temp_reg, -2);
+      // Jump to successors[last_index + 1] if value < case_value[last_index + 2].
+      __ B(lo, codegen_->GetLabelOf(successors[last_index + 1]));
+      // Jump to successors[last_index + 2] if value == case_value[last_index + 2].
+      __ B(eq, codegen_->GetLabelOf(successors[last_index + 2]));
+    }
+    if (num_entries - last_index == 2) {
+      // The last missing case_value.
+      __ Cmp(temp_reg, 1);
+      __ B(eq, codegen_->GetLabelOf(successors[last_index + 1]));
+    }
+
+    // And the default for any other value.
+    if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
+      __ B(codegen_->GetLabelOf(default_block));
+    }
+  } else {
+    // Create a table lookup.
+    vixl32::Register table_base = RegisterFrom(locations->GetTemp(0));
+
+    JumpTableARMVIXL* jump_table = codegen_->CreateJumpTable(switch_instr);
+
+    // Remove the bias.
+    vixl32::Register key_reg;
+    if (lower_bound != 0) {
+      key_reg = RegisterFrom(locations->GetTemp(1));
+      __ Sub(key_reg, value_reg, lower_bound);
+    } else {
+      key_reg = value_reg;
+    }
+
+    // Check whether the value is in the table, jump to default block if not.
+    __ Cmp(key_reg, num_entries - 1);
+    __ B(hi, codegen_->GetLabelOf(default_block));
+
+    UseScratchRegisterScope temps(GetAssembler()->GetVIXLAssembler());
+    vixl32::Register jump_offset = temps.Acquire();
+
+    // Load jump offset from the table.
+    __ Adr(table_base, jump_table->GetTableStartLabel());
+    __ Ldr(jump_offset, MemOperand(table_base, key_reg, vixl32::LSL, 2));
+
+    // Jump to target block by branching to table_base(pc related) + offset.
+    vixl32::Register target_address = table_base;
+    __ Add(target_address, table_base, jump_offset);
+    __ Bx(target_address);
+  }
+}
+
+// Copy the result of a call into the given target.
+void CodeGeneratorARMVIXL::MoveFromReturnRegister(Location trg, Primitive::Type type) {
+  if (!trg.IsValid()) {
+    DCHECK_EQ(type, Primitive::kPrimVoid);
+    return;
+  }
+
+  DCHECK_NE(type, Primitive::kPrimVoid);
+
+  Location return_loc = InvokeDexCallingConventionVisitorARM().GetReturnLocation(type);
+  if (return_loc.Equals(trg)) {
+    return;
+  }
+
+  // TODO: Consider pairs in the parallel move resolver, then this could be nicely merged
+  //       with the last branch.
+  if (type == Primitive::kPrimLong) {
+    TODO_VIXL32(FATAL);
+  } else if (type == Primitive::kPrimDouble) {
+    TODO_VIXL32(FATAL);
+  } else {
+    // Let the parallel move resolver take care of all of this.
+    HParallelMove parallel_move(GetGraph()->GetArena());
+    parallel_move.AddMove(return_loc, trg, type, nullptr);
+    GetMoveResolver()->EmitNativeCode(&parallel_move);
+  }
+}
+
+void LocationsBuilderARMVIXL::VisitClassTableGet(
+    HClassTableGet* instruction ATTRIBUTE_UNUSED) {
+  TODO_VIXL32(FATAL);
+}
+
+void InstructionCodeGeneratorARMVIXL::VisitClassTableGet(
+    HClassTableGet* instruction ATTRIBUTE_UNUSED) {
+  TODO_VIXL32(FATAL);
+}
+
 
 #undef __
 #undef QUICK_ENTRY_POINT
diff --git a/compiler/optimizing/code_generator_arm_vixl.h b/compiler/optimizing/code_generator_arm_vixl.h
index 7b7118c..e8bc2a9 100644
--- a/compiler/optimizing/code_generator_arm_vixl.h
+++ b/compiler/optimizing/code_generator_arm_vixl.h
@@ -18,6 +18,7 @@
 #define ART_COMPILER_OPTIMIZING_CODE_GENERATOR_ARM_VIXL_H_
 
 #include "code_generator_arm.h"
+#include "common_arm.h"
 #include "utils/arm/assembler_arm_vixl.h"
 
 // TODO(VIXL): make vixl clean wrt -Wshadow.
@@ -29,7 +30,7 @@
 #pragma GCC diagnostic pop
 
 // True if VIXL32 should be used for codegen on ARM.
-#ifdef USE_VIXL_ARM_BACKEND
+#ifdef ART_USE_VIXL_ARM_BACKEND
 static constexpr bool kArmUseVIXL32 = true;
 #else
 static constexpr bool kArmUseVIXL32 = false;
@@ -38,48 +39,80 @@
 namespace art {
 namespace arm {
 
+static const vixl::aarch32::Register kParameterCoreRegistersVIXL[] = {
+    vixl::aarch32::r1,
+    vixl::aarch32::r2,
+    vixl::aarch32::r3
+};
+static const size_t kParameterCoreRegistersLengthVIXL = arraysize(kParameterCoreRegisters);
+static const vixl::aarch32::SRegister kParameterFpuRegistersVIXL[] = {
+    vixl::aarch32::s0,
+    vixl::aarch32::s1,
+    vixl::aarch32::s2,
+    vixl::aarch32::s3,
+    vixl::aarch32::s4,
+    vixl::aarch32::s5,
+    vixl::aarch32::s6,
+    vixl::aarch32::s7,
+    vixl::aarch32::s8,
+    vixl::aarch32::s9,
+    vixl::aarch32::s10,
+    vixl::aarch32::s11,
+    vixl::aarch32::s12,
+    vixl::aarch32::s13,
+    vixl::aarch32::s14,
+    vixl::aarch32::s15
+};
+static const size_t kParameterFpuRegistersLengthVIXL = arraysize(kParameterFpuRegisters);
+
 static const vixl::aarch32::Register kMethodRegister = vixl::aarch32::r0;
+
 static const vixl::aarch32::Register kCoreAlwaysSpillRegister = vixl::aarch32::r5;
-static const vixl::aarch32::RegisterList kCoreCalleeSaves = vixl::aarch32::RegisterList(
-    (1 << R5) | (1 << R6) | (1 << R7) | (1 << R8) | (1 << R10) | (1 << R11) | (1 << LR));
-// Callee saves s16 to s31 inc.
+
+// Callee saves core registers r5, r6, r7, r8, r10, r11, and lr.
+static const vixl::aarch32::RegisterList kCoreCalleeSaves = vixl::aarch32::RegisterList::Union(
+    vixl::aarch32::RegisterList(vixl::aarch32::r5,
+                                vixl::aarch32::r6,
+                                vixl::aarch32::r7,
+                                vixl::aarch32::r8),
+    vixl::aarch32::RegisterList(vixl::aarch32::r10,
+                                vixl::aarch32::r11,
+                                vixl::aarch32::lr));
+
+// Callee saves FP registers s16 to s31 inclusive.
 static const vixl::aarch32::SRegisterList kFpuCalleeSaves =
     vixl::aarch32::SRegisterList(vixl::aarch32::s16, 16);
 
+static const vixl::aarch32::Register kRuntimeParameterCoreRegistersVIXL[] = {
+    vixl::aarch32::r0,
+    vixl::aarch32::r1,
+    vixl::aarch32::r2,
+    vixl::aarch32::r3
+};
+static const size_t kRuntimeParameterCoreRegistersLengthVIXL =
+    arraysize(kRuntimeParameterCoreRegisters);
+static const vixl::aarch32::SRegister kRuntimeParameterFpuRegistersVIXL[] = {
+    vixl::aarch32::s0,
+    vixl::aarch32::s1,
+    vixl::aarch32::s2,
+    vixl::aarch32::s3
+};
+static const size_t kRuntimeParameterFpuRegistersLengthVIXL =
+    arraysize(kRuntimeParameterFpuRegisters);
+
+class LoadClassSlowPathARMVIXL;
+
 #define FOR_EACH_IMPLEMENTED_INSTRUCTION(M)     \
   M(Above)                                      \
   M(AboveOrEqual)                               \
   M(Add)                                        \
-  M(Below)                                      \
-  M(BelowOrEqual)                               \
-  M(Div)                                        \
-  M(DivZeroCheck)                               \
-  M(Equal)                                      \
-  M(Exit)                                       \
-  M(Goto)                                       \
-  M(GreaterThan)                                \
-  M(GreaterThanOrEqual)                         \
-  M(If)                                         \
-  M(IntConstant)                                \
-  M(LessThan)                                   \
-  M(LessThanOrEqual)                            \
-  M(LongConstant)                               \
-  M(MemoryBarrier)                              \
-  M(Mul)                                        \
-  M(Not)                                        \
-  M(NotEqual)                                   \
-  M(ParallelMove)                               \
-  M(Return)                                     \
-  M(ReturnVoid)                                 \
-  M(Sub)                                        \
-  M(TypeConversion)                             \
-
-// TODO: Remove once the VIXL32 backend is implemented completely.
-#define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M)   \
   M(And)                                        \
   M(ArrayGet)                                   \
   M(ArrayLength)                                \
   M(ArraySet)                                   \
+  M(Below)                                      \
+  M(BelowOrEqual)                               \
+  M(BitwiseNegatedRight)                        \
   M(BooleanNot)                                 \
   M(BoundsCheck)                                \
   M(BoundType)                                  \
@@ -90,39 +123,62 @@
   M(Compare)                                    \
   M(CurrentMethod)                              \
   M(Deoptimize)                                 \
+  M(Div)                                        \
+  M(DivZeroCheck)                               \
   M(DoubleConstant)                             \
+  M(Equal)                                      \
+  M(Exit)                                       \
   M(FloatConstant)                              \
+  M(Goto)                                       \
+  M(GreaterThan)                                \
+  M(GreaterThanOrEqual)                         \
+  M(If)                                         \
   M(InstanceFieldGet)                           \
   M(InstanceFieldSet)                           \
   M(InstanceOf)                                 \
+  M(IntConstant)                                \
+  M(IntermediateAddress)                        \
   M(InvokeInterface)                            \
   M(InvokeStaticOrDirect)                       \
   M(InvokeUnresolved)                           \
   M(InvokeVirtual)                              \
+  M(LessThan)                                   \
+  M(LessThanOrEqual)                            \
   M(LoadClass)                                  \
   M(LoadException)                              \
   M(LoadString)                                 \
+  M(LongConstant)                               \
+  M(MemoryBarrier)                              \
   M(MonitorOperation)                           \
+  M(Mul)                                        \
+  M(MultiplyAccumulate)                         \
   M(NativeDebugInfo)                            \
   M(Neg)                                        \
   M(NewArray)                                   \
   M(NewInstance)                                \
+  M(Not)                                        \
+  M(NotEqual)                                   \
   M(NullCheck)                                  \
   M(NullConstant)                               \
   M(Or)                                         \
   M(PackedSwitch)                               \
+  M(ParallelMove)                               \
   M(ParameterValue)                             \
   M(Phi)                                        \
   M(Rem)                                        \
+  M(Return)                                     \
+  M(ReturnVoid)                                 \
   M(Ror)                                        \
   M(Select)                                     \
   M(Shl)                                        \
   M(Shr)                                        \
   M(StaticFieldGet)                             \
   M(StaticFieldSet)                             \
+  M(Sub)                                        \
   M(SuspendCheck)                               \
   M(Throw)                                      \
   M(TryBoundary)                                \
+  M(TypeConversion)                             \
   M(UnresolvedInstanceFieldGet)                 \
   M(UnresolvedInstanceFieldSet)                 \
   M(UnresolvedStaticFieldGet)                   \
@@ -130,8 +186,88 @@
   M(UShr)                                       \
   M(Xor)                                        \
 
+// TODO: Remove once the VIXL32 backend is implemented completely.
+#define FOR_EACH_UNIMPLEMENTED_INSTRUCTION(M)   \
+  M(ArmDexCacheArraysBase)                      \
+
 class CodeGeneratorARMVIXL;
 
+class JumpTableARMVIXL : public DeletableArenaObject<kArenaAllocSwitchTable> {
+ public:
+  explicit JumpTableARMVIXL(HPackedSwitch* switch_instr)
+      : switch_instr_(switch_instr), table_start_() {}
+
+  vixl::aarch32::Label* GetTableStartLabel() { return &table_start_; }
+
+  void EmitTable(CodeGeneratorARMVIXL* codegen);
+
+ private:
+  HPackedSwitch* const switch_instr_;
+  vixl::aarch32::Label table_start_;
+
+  DISALLOW_COPY_AND_ASSIGN(JumpTableARMVIXL);
+};
+
+class InvokeRuntimeCallingConventionARMVIXL
+    : public CallingConvention<vixl::aarch32::Register, vixl::aarch32::SRegister> {
+ public:
+  InvokeRuntimeCallingConventionARMVIXL()
+      : CallingConvention(kRuntimeParameterCoreRegistersVIXL,
+                          kRuntimeParameterCoreRegistersLengthVIXL,
+                          kRuntimeParameterFpuRegistersVIXL,
+                          kRuntimeParameterFpuRegistersLengthVIXL,
+                          kArmPointerSize) {}
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(InvokeRuntimeCallingConventionARMVIXL);
+};
+
+class InvokeDexCallingConventionARMVIXL
+    : public CallingConvention<vixl::aarch32::Register, vixl::aarch32::SRegister> {
+ public:
+  InvokeDexCallingConventionARMVIXL()
+      : CallingConvention(kParameterCoreRegistersVIXL,
+                          kParameterCoreRegistersLengthVIXL,
+                          kParameterFpuRegistersVIXL,
+                          kParameterFpuRegistersLengthVIXL,
+                          kArmPointerSize) {}
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(InvokeDexCallingConventionARMVIXL);
+};
+
+class FieldAccessCallingConventionARMVIXL : public FieldAccessCallingConvention {
+ public:
+  FieldAccessCallingConventionARMVIXL() {}
+
+  Location GetObjectLocation() const OVERRIDE {
+    return helpers::LocationFrom(vixl::aarch32::r1);
+  }
+  Location GetFieldIndexLocation() const OVERRIDE {
+    return helpers::LocationFrom(vixl::aarch32::r0);
+  }
+  Location GetReturnLocation(Primitive::Type type) const OVERRIDE {
+    return Primitive::Is64BitType(type)
+        ? helpers::LocationFrom(vixl::aarch32::r0, vixl::aarch32::r1)
+        : helpers::LocationFrom(vixl::aarch32::r0);
+  }
+  Location GetSetValueLocation(Primitive::Type type, bool is_instance) const OVERRIDE {
+    return Primitive::Is64BitType(type)
+        ? helpers::LocationFrom(vixl::aarch32::r2, vixl::aarch32::r3)
+        : (is_instance
+            ? helpers::LocationFrom(vixl::aarch32::r2)
+            : helpers::LocationFrom(vixl::aarch32::r1));
+  }
+  Location GetFpuLocation(Primitive::Type type) const OVERRIDE {
+    return Primitive::Is64BitType(type)
+        ? helpers::LocationFrom(vixl::aarch32::s0, vixl::aarch32::s1)
+        : helpers::LocationFrom(vixl::aarch32::s0);
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(FieldAccessCallingConventionARMVIXL);
+};
+
 class SlowPathCodeARMVIXL : public SlowPathCode {
  public:
   explicit SlowPathCodeARMVIXL(HInstruction* instruction)
@@ -163,7 +299,7 @@
   ArmVIXLAssembler* GetAssembler() const;
 
  private:
-  void Exchange(Register reg, int mem);
+  void Exchange(vixl32::Register reg, int mem);
   void Exchange(int mem1, int mem2);
 
   CodeGeneratorARMVIXL* const codegen_;
@@ -192,7 +328,19 @@
     LOG(FATAL) << "Unimplemented Instruction: " << instruction->DebugName();
   }
 
+  void HandleInvoke(HInvoke* invoke);
+  void HandleBitwiseOperation(HBinaryOperation* operation, Opcode opcode);
   void HandleCondition(HCondition* condition);
+  void HandleIntegerRotate(LocationSummary* locations);
+  void HandleLongRotate(LocationSummary* locations);
+  void HandleShift(HBinaryOperation* operation);
+  void HandleFieldSet(HInstruction* instruction, const FieldInfo& field_info);
+  void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+  Location ArithmeticZeroOrFpuRegister(HInstruction* input);
+  Location ArmEncodableConstantOrRegister(HInstruction* constant, Opcode opcode);
+  bool CanEncodeConstantAsImmediate(HConstant* input_cst, Opcode opcode);
+  bool CanEncodeConstantAsImmediate(uint32_t value, Opcode opcode, SetCc set_cc = kCcDontCare);
 
   CodeGeneratorARMVIXL* const codegen_;
   InvokeDexCallingConventionVisitorARM parameter_visitor_;
@@ -216,9 +364,78 @@
     LOG(FATAL) << "Unimplemented Instruction: " << instruction->DebugName();
   }
 
+  // Generate code for the given suspend check. If not null, `successor`
+  // is the block to branch to if the suspend check is not needed, and after
+  // the suspend call.
   void GenerateSuspendCheck(HSuspendCheck* instruction, HBasicBlock* successor);
+  void GenerateClassInitializationCheck(LoadClassSlowPathARMVIXL* slow_path,
+                                        vixl32::Register class_reg);
   void HandleGoto(HInstruction* got, HBasicBlock* successor);
+  void GenerateAndConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
+  void GenerateOrrConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
+  void GenerateEorConst(vixl::aarch32::Register out, vixl::aarch32::Register first, uint32_t value);
+  void HandleBitwiseOperation(HBinaryOperation* operation);
   void HandleCondition(HCondition* condition);
+  void HandleIntegerRotate(HRor* ror);
+  void HandleLongRotate(HRor* ror);
+  void HandleShift(HBinaryOperation* operation);
+
+  void GenerateWideAtomicStore(vixl::aarch32::Register addr,
+                               uint32_t offset,
+                               vixl::aarch32::Register value_lo,
+                               vixl::aarch32::Register value_hi,
+                               vixl::aarch32::Register temp1,
+                               vixl::aarch32::Register temp2,
+                               HInstruction* instruction);
+  void GenerateWideAtomicLoad(vixl::aarch32::Register addr,
+                              uint32_t offset,
+                              vixl::aarch32::Register out_lo,
+                              vixl::aarch32::Register out_hi);
+
+  void HandleFieldSet(HInstruction* instruction,
+                      const FieldInfo& field_info,
+                      bool value_can_be_null);
+  void HandleFieldGet(HInstruction* instruction, const FieldInfo& field_info);
+
+  // Generate a heap reference load using one register `out`:
+  //
+  //   out <- *(out + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  //
+  // Location `maybe_temp` is used when generating a read barrier and
+  // shall be a register in that case; it may be an invalid location
+  // otherwise.
+  void GenerateReferenceLoadOneRegister(HInstruction* instruction,
+                                        Location out,
+                                        uint32_t offset,
+                                        Location maybe_temp);
+  // Generate a heap reference load using two different registers
+  // `out` and `obj`:
+  //
+  //   out <- *(obj + offset)
+  //
+  // while honoring heap poisoning and/or read barriers (if any).
+  //
+  // Location `maybe_temp` is used when generating a Baker's (fast
+  // path) read barrier and shall be a register in that case; it may
+  // be an invalid location otherwise.
+  void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
+                                         Location out,
+                                         Location obj,
+                                         uint32_t offset,
+                                         Location maybe_temp);
+
+  // Generate a GC root reference load:
+  //
+  //   root <- *(obj + offset)
+  //
+  // while honoring read barriers if `requires_read_barrier` is true.
+  void GenerateGcRootFieldLoad(HInstruction* instruction,
+                               Location root,
+                               vixl::aarch32::Register obj,
+                               uint32_t offset,
+                               bool requires_read_barrier);
   void GenerateTestAndBranch(HInstruction* instruction,
                              size_t condition_input_index,
                              vixl::aarch32::Label* true_target,
@@ -259,7 +476,14 @@
 
   void GenerateFrameEntry() OVERRIDE;
   void GenerateFrameExit() OVERRIDE;
+
   void Bind(HBasicBlock* block) OVERRIDE;
+
+  vixl::aarch32::Label* GetLabelOf(HBasicBlock* block) {
+    block = FirstNonEmptyBlock(block);
+    return &(block_labels_[block->GetBlockId()]);
+  }
+
   void MoveConstant(Location destination, int32_t value) OVERRIDE;
   void MoveLocation(Location dst, Location src, Primitive::Type dst_type) OVERRIDE;
   void AddLocationAsTemp(Location location, LocationSummary* locations) OVERRIDE;
@@ -274,24 +498,45 @@
 
   size_t GetFloatingPointSpillSlotSize() const OVERRIDE { return vixl::aarch32::kRegSizeInBytes; }
 
+  uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE {
+    vixl::aarch32::Label* block_entry_label = GetLabelOf(block);
+    DCHECK(block_entry_label->IsBound());
+    return block_entry_label->GetLocation();
+  }
+
+  JumpTableARMVIXL* CreateJumpTable(HPackedSwitch* switch_instr) {
+    jump_tables_.emplace_back(new (GetGraph()->GetArena()) JumpTableARMVIXL(switch_instr));
+    return jump_tables_.back().get();
+  }
+
   HGraphVisitor* GetLocationBuilder() OVERRIDE { return &location_builder_; }
 
   HGraphVisitor* GetInstructionVisitor() OVERRIDE { return &instruction_visitor_; }
 
-  uintptr_t GetAddressOf(HBasicBlock* block) OVERRIDE;
-
+  void EmitJumpTables();
   void GenerateMemoryBarrier(MemBarrierKind kind);
   void Finalize(CodeAllocator* allocator) OVERRIDE;
   void SetupBlockedRegisters() const OVERRIDE;
 
-  // Blocks all register pairs made out of blocked core registers.
-  void UpdateBlockedPairRegisters() const;
-
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
 
   InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kThumb2; }
 
+  // Helper method to move a 32-bit value between two locations.
+  void Move32(Location destination, Location source);
+
+  void LoadFromShiftedRegOffset(Primitive::Type type,
+                                Location out_loc,
+                                vixl::aarch32::Register base,
+                                vixl::aarch32::Register reg_index,
+                                vixl::aarch32::Condition cond = vixl::aarch32::al);
+  void StoreToShiftedRegOffset(Primitive::Type type,
+                               Location out_loc,
+                               vixl::aarch32::Register base,
+                               vixl::aarch32::Register reg_index,
+                               vixl::aarch32::Condition cond = vixl::aarch32::al);
+
   const ArmInstructionSetFeatures& GetInstructionSetFeatures() const { return isa_features_; }
 
   vixl::aarch32::Label* GetFrameEntryLabel() { return &frame_entry_label_; }
@@ -316,11 +561,7 @@
     return 0;
   }
 
-  size_t RestoreFloatingPointRegister(size_t stack_index ATTRIBUTE_UNUSED,
-                                      uint32_t reg_id ATTRIBUTE_UNUSED) OVERRIDE {
-    UNIMPLEMENTED(INFO) << "TODO: RestoreFloatingPointRegister";
-    return 0;
-  }
+  size_t RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) OVERRIDE;
 
   bool NeedsTwoRegisters(Primitive::Type type) const OVERRIDE {
     return type == Primitive::kPrimDouble || type == Primitive::kPrimLong;
@@ -349,6 +590,79 @@
 
   void GenerateInvokeRuntime(int32_t entry_point_offset);
 
+  // Emit a write barrier.
+  void MarkGCCard(vixl::aarch32::Register temp,
+                  vixl::aarch32::Register card,
+                  vixl::aarch32::Register object,
+                  vixl::aarch32::Register value,
+                  bool can_be_null);
+
+  // Fast path implementation of ReadBarrier::Barrier for a heap
+  // reference field load when Baker's read barriers are used.
+  void GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
+                                             Location ref,
+                                             vixl::aarch32::Register obj,
+                                             uint32_t offset,
+                                             Location temp,
+                                             bool needs_null_check);
+
+  // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
+  // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
+  //
+  // Load the object reference located at the address
+  // `obj + offset + (index << scale_factor)`, held by object `obj`, into
+  // `ref`, and mark it if needed.
+  //
+  // If `always_update_field` is true, the value of the reference is
+  // atomically updated in the holder (`obj`).  This operation
+  // requires an extra temporary register, which must be provided as a
+  // non-null pointer (`temp2`).
+  void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
+                                                 Location ref,
+                                                 vixl::aarch32::Register obj,
+                                                 uint32_t offset,
+                                                 Location index,
+                                                 ScaleFactor scale_factor,
+                                                 Location temp,
+                                                 bool needs_null_check,
+                                                 bool always_update_field = false,
+                                                 vixl::aarch32::Register* temp2 = nullptr);
+
+  // Generate a read barrier for a heap reference within `instruction`
+  // using a slow path.
+  //
+  // A read barrier for an object reference read from the heap is
+  // implemented as a call to the artReadBarrierSlow runtime entry
+  // point, which is passed the values in locations `ref`, `obj`, and
+  // `offset`:
+  //
+  //   mirror::Object* artReadBarrierSlow(mirror::Object* ref,
+  //                                      mirror::Object* obj,
+  //                                      uint32_t offset);
+  //
+  // The `out` location contains the value returned by
+  // artReadBarrierSlow.
+  //
+  // When `index` is provided (i.e. for array accesses), the offset
+  // value passed to artReadBarrierSlow is adjusted to take `index`
+  // into account.
+  void GenerateReadBarrierSlow(HInstruction* instruction,
+                               Location out,
+                               Location ref,
+                               Location obj,
+                               uint32_t offset,
+                               Location index = Location::NoLocation());
+
+  // If read barriers are enabled, generate a read barrier for a heap
+  // reference using a slow path. If heap poisoning is enabled, also
+  // unpoison the reference in `out`.
+  void MaybeGenerateReadBarrierSlow(HInstruction* instruction,
+                                    Location out,
+                                    Location ref,
+                                    Location obj,
+                                    uint32_t offset,
+                                    Location index = Location::NoLocation());
+
   // Check if the desired_string_load_kind is supported. If it is, return it,
   // otherwise return a fall-back kind that should be used instead.
   HLoadString::LoadKind GetSupportedLoadStringKind(
@@ -372,17 +686,16 @@
 
   void GenerateNop() OVERRIDE;
 
-  vixl::aarch32::Label* GetLabelOf(HBasicBlock* block) {
-    block = FirstNonEmptyBlock(block);
-    return &(block_labels_[block->GetBlockId()]);
-  }
-
  private:
+  vixl::aarch32::Register GetInvokeStaticOrDirectExtraParameter(HInvokeStaticOrDirect* invoke,
+                                                                vixl::aarch32::Register temp);
+
   // Labels for each block that will be compiled.
   // We use a deque so that the `vixl::aarch32::Label` objects do not move in memory.
   ArenaDeque<vixl::aarch32::Label> block_labels_;  // Indexed by block id.
   vixl::aarch32::Label frame_entry_label_;
 
+  ArenaVector<std::unique_ptr<JumpTableARMVIXL>> jump_tables_;
   LocationsBuilderARMVIXL location_builder_;
   InstructionCodeGeneratorARMVIXL instruction_visitor_;
   ParallelMoveResolverARMVIXL move_resolver_;
diff --git a/compiler/optimizing/code_generator_mips.cc b/compiler/optimizing/code_generator_mips.cc
index 990bbcc..573bb50 100644
--- a/compiler/optimizing/code_generator_mips.cc
+++ b/compiler/optimizing/code_generator_mips.cc
@@ -378,7 +378,6 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0) : locations->Out();
     uint32_t dex_pc = instruction_->GetDexPc();
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
@@ -390,24 +389,22 @@
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
-    codegen->EmitParallelMoves(locations->InAt(1),
+    codegen->EmitParallelMoves(locations->InAt(0),
                                Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
                                Primitive::kPrimNot,
-                               object_class,
+                               locations->InAt(1),
                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
                                Primitive::kPrimNot);
-
     if (instruction_->IsInstanceOf()) {
       mips_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
-      CheckEntrypointTypes<
-          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
       mips_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
     } else {
       DCHECK(instruction_->IsCheckCast());
-      mips_codegen->InvokeRuntime(kQuickCheckCast, instruction_, dex_pc, this);
-      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
+      mips_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
+      CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
     }
 
     RestoreLiveRegisters(codegen, locations);
@@ -563,8 +560,7 @@
     DCHECK_EQ(type, Primitive::kPrimFloat);  // Can only swap a float.
     FRegister f1 = loc1.IsFpuRegister() ? loc1.AsFpuRegister<FRegister>()
                                         : loc2.AsFpuRegister<FRegister>();
-    Register r2 = loc1.IsRegister() ? loc1.AsRegister<Register>()
-                                    : loc2.AsRegister<Register>();
+    Register r2 = loc1.IsRegister() ? loc1.AsRegister<Register>() : loc2.AsRegister<Register>();
     __ Move(TMP, r2);
     __ Mfc1(r2, f1);
     __ Mtc1(TMP, f1);
@@ -605,10 +601,8 @@
     Exchange(loc1.GetStackIndex(), loc2.GetStackIndex(), /* double_slot */ true);
   } else if ((loc1.IsRegister() && loc2.IsStackSlot()) ||
              (loc1.IsStackSlot() && loc2.IsRegister())) {
-    Register reg = loc1.IsRegister() ? loc1.AsRegister<Register>()
-                                     : loc2.AsRegister<Register>();
-    intptr_t offset = loc1.IsStackSlot() ? loc1.GetStackIndex()
-                                         : loc2.GetStackIndex();
+    Register reg = loc1.IsRegister() ? loc1.AsRegister<Register>() : loc2.AsRegister<Register>();
+    intptr_t offset = loc1.IsStackSlot() ? loc1.GetStackIndex() : loc2.GetStackIndex();
     __ Move(TMP, reg);
     __ LoadFromOffset(kLoadWord, reg, SP, offset);
     __ StoreToOffset(kStoreWord, TMP, SP, offset);
@@ -618,8 +612,7 @@
                                            : loc2.AsRegisterPairLow<Register>();
     Register reg_h = loc1.IsRegisterPair() ? loc1.AsRegisterPairHigh<Register>()
                                            : loc2.AsRegisterPairHigh<Register>();
-    intptr_t offset_l = loc1.IsDoubleStackSlot() ? loc1.GetStackIndex()
-                                                 : loc2.GetStackIndex();
+    intptr_t offset_l = loc1.IsDoubleStackSlot() ? loc1.GetStackIndex() : loc2.GetStackIndex();
     intptr_t offset_h = loc1.IsDoubleStackSlot() ? loc1.GetHighStackIndex(kMipsWordSize)
                                                  : loc2.GetHighStackIndex(kMipsWordSize);
     __ Move(TMP, reg_l);
@@ -628,6 +621,20 @@
     __ Move(TMP, reg_h);
     __ LoadFromOffset(kLoadWord, reg_h, SP, offset_h);
     __ StoreToOffset(kStoreWord, TMP, SP, offset_h);
+  } else if (loc1.IsFpuRegister() || loc2.IsFpuRegister()) {
+    FRegister reg = loc1.IsFpuRegister() ? loc1.AsFpuRegister<FRegister>()
+                                         : loc2.AsFpuRegister<FRegister>();
+    intptr_t offset = loc1.IsFpuRegister() ? loc2.GetStackIndex() : loc1.GetStackIndex();
+    if (type == Primitive::kPrimFloat) {
+      __ MovS(FTMP, reg);
+      __ LoadSFromOffset(reg, SP, offset);
+      __ StoreSToOffset(FTMP, SP, offset);
+    } else {
+      DCHECK_EQ(type, Primitive::kPrimDouble);
+      __ MovD(FTMP, reg);
+      __ LoadDFromOffset(reg, SP, offset);
+      __ StoreDToOffset(FTMP, SP, offset);
+    }
   } else {
     LOG(FATAL) << "Swap between " << loc1 << " and " << loc2 << " is unsupported";
   }
@@ -743,9 +750,12 @@
     // TODO: __ cfi().RelOffset(DWARFReg(reg), ofs);
   }
 
-  // Store the current method pointer.
-  // TODO: can we not do this if RequiresCurrentMethod() returns false?
-  __ StoreToOffset(kStoreWord, kMethodRegisterArgument, SP, kCurrentMethodStackOffset);
+  // Save the current method if we need it. Note that we do not
+  // do this in HCurrentMethod, as the instruction might have been removed
+  // in the SSA graph.
+  if (RequiresCurrentMethod()) {
+    __ StoreToOffset(kStoreWord, kMethodRegisterArgument, SP, kCurrentMethodStackOffset);
+  }
 }
 
 void CodeGeneratorMIPS::GenerateFrameExit() {
@@ -1165,9 +1175,6 @@
 }
 
 void CodeGeneratorMIPS::SetupBlockedRegisters() const {
-  // Don't allocate the dalvik style register pair passing.
-  blocked_register_pairs_[A1_A2] = true;
-
   // ZERO, K0, K1, GP, SP, RA are always reserved and can't be allocated.
   blocked_core_registers_[ZERO] = true;
   blocked_core_registers_[K0] = true;
@@ -1202,19 +1209,6 @@
       blocked_fpu_registers_[kFpuCalleeSaves[i]] = true;
     }
   }
-
-  UpdateBlockedPairRegisters();
-}
-
-void CodeGeneratorMIPS::UpdateBlockedPairRegisters() const {
-  for (int i = 0; i < kNumberOfRegisterPairs; i++) {
-    MipsManagedRegister current =
-        MipsManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
-    if (blocked_core_registers_[current.AsRegisterPairLow()]
-        || blocked_core_registers_[current.AsRegisterPairHigh()]) {
-      blocked_register_pairs_[i] = true;
-    }
-  }
 }
 
 size_t CodeGeneratorMIPS::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
@@ -2252,6 +2246,11 @@
     case Primitive::kPrimShort:
     case Primitive::kPrimChar:
     case Primitive::kPrimInt:
+      locations->SetInAt(0, Location::RequiresRegister());
+      locations->SetInAt(1, Location::RequiresRegister());
+      locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+      break;
+
     case Primitive::kPrimLong:
       locations->SetInAt(0, Location::RequiresRegister());
       locations->SetInAt(1, Location::RequiresRegister());
@@ -2820,19 +2819,36 @@
   switch (cond) {
     case kCondEQ:
     case kCondNE:
-      if (use_imm && IsUint<16>(rhs_imm)) {
-        __ Xori(dst, lhs, rhs_imm);
-      } else {
-        if (use_imm) {
-          rhs_reg = TMP;
-          __ LoadConst32(rhs_reg, rhs_imm);
+      if (use_imm && IsInt<16>(-rhs_imm)) {
+        if (rhs_imm == 0) {
+          if (cond == kCondEQ) {
+            __ Sltiu(dst, lhs, 1);
+          } else {
+            __ Sltu(dst, ZERO, lhs);
+          }
+        } else {
+          __ Addiu(dst, lhs, -rhs_imm);
+          if (cond == kCondEQ) {
+            __ Sltiu(dst, dst, 1);
+          } else {
+            __ Sltu(dst, ZERO, dst);
+          }
         }
-        __ Xor(dst, lhs, rhs_reg);
-      }
-      if (cond == kCondEQ) {
-        __ Sltiu(dst, dst, 1);
       } else {
-        __ Sltu(dst, ZERO, dst);
+        if (use_imm && IsUint<16>(rhs_imm)) {
+          __ Xori(dst, lhs, rhs_imm);
+        } else {
+          if (use_imm) {
+            rhs_reg = TMP;
+            __ LoadConst32(rhs_reg, rhs_imm);
+          }
+          __ Xor(dst, lhs, rhs_reg);
+        }
+        if (cond == kCondEQ) {
+          __ Sltiu(dst, dst, 1);
+        } else {
+          __ Sltu(dst, ZERO, dst);
+        }
       }
       break;
 
@@ -2932,13 +2948,111 @@
   }
 }
 
+bool InstructionCodeGeneratorMIPS::MaterializeIntCompare(IfCondition cond,
+                                                         LocationSummary* input_locations,
+                                                         Register dst) {
+  Register lhs = input_locations->InAt(0).AsRegister<Register>();
+  Location rhs_location = input_locations->InAt(1);
+  Register rhs_reg = ZERO;
+  int64_t rhs_imm = 0;
+  bool use_imm = rhs_location.IsConstant();
+  if (use_imm) {
+    rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
+  } else {
+    rhs_reg = rhs_location.AsRegister<Register>();
+  }
+
+  switch (cond) {
+    case kCondEQ:
+    case kCondNE:
+      if (use_imm && IsInt<16>(-rhs_imm)) {
+        __ Addiu(dst, lhs, -rhs_imm);
+      } else if (use_imm && IsUint<16>(rhs_imm)) {
+        __ Xori(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Xor(dst, lhs, rhs_reg);
+      }
+      return (cond == kCondEQ);
+
+    case kCondLT:
+    case kCondGE:
+      if (use_imm && IsInt<16>(rhs_imm)) {
+        __ Slti(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Slt(dst, lhs, rhs_reg);
+      }
+      return (cond == kCondGE);
+
+    case kCondLE:
+    case kCondGT:
+      if (use_imm && IsInt<16>(rhs_imm + 1)) {
+        // Simulate lhs <= rhs via lhs < rhs + 1.
+        __ Slti(dst, lhs, rhs_imm + 1);
+        return (cond == kCondGT);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Slt(dst, rhs_reg, lhs);
+        return (cond == kCondLE);
+      }
+
+    case kCondB:
+    case kCondAE:
+      if (use_imm && IsInt<16>(rhs_imm)) {
+        // Sltiu sign-extends its 16-bit immediate operand before
+        // the comparison and thus lets us compare directly with
+        // unsigned values in the ranges [0, 0x7fff] and
+        // [0xffff8000, 0xffffffff].
+        __ Sltiu(dst, lhs, rhs_imm);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Sltu(dst, lhs, rhs_reg);
+      }
+      return (cond == kCondAE);
+
+    case kCondBE:
+    case kCondA:
+      if (use_imm && (rhs_imm != -1) && IsInt<16>(rhs_imm + 1)) {
+        // Simulate lhs <= rhs via lhs < rhs + 1.
+        // Note that this only works if rhs + 1 does not overflow
+        // to 0, hence the check above.
+        // Sltiu sign-extends its 16-bit immediate operand before
+        // the comparison and thus lets us compare directly with
+        // unsigned values in the ranges [0, 0x7fff] and
+        // [0xffff8000, 0xffffffff].
+        __ Sltiu(dst, lhs, rhs_imm + 1);
+        return (cond == kCondA);
+      } else {
+        if (use_imm) {
+          rhs_reg = TMP;
+          __ LoadConst32(rhs_reg, rhs_imm);
+        }
+        __ Sltu(dst, rhs_reg, lhs);
+        return (cond == kCondBE);
+      }
+  }
+}
+
 void InstructionCodeGeneratorMIPS::GenerateIntCompareAndBranch(IfCondition cond,
                                                                LocationSummary* locations,
                                                                MipsLabel* label) {
   Register lhs = locations->InAt(0).AsRegister<Register>();
   Location rhs_location = locations->InAt(1);
   Register rhs_reg = ZERO;
-  int32_t rhs_imm = 0;
+  int64_t rhs_imm = 0;
   bool use_imm = rhs_location.IsConstant();
   if (use_imm) {
     rhs_imm = CodeGenerator::GetInt32ValueOf(rhs_location.GetConstant());
@@ -2975,42 +3089,136 @@
         break;
     }
   } else {
-    if (use_imm) {
-      // TODO: more efficient comparison with 16-bit constants without loading them into TMP.
-      rhs_reg = TMP;
-      __ LoadConst32(rhs_reg, rhs_imm);
-    }
-    switch (cond) {
-      case kCondEQ:
-        __ Beq(lhs, rhs_reg, label);
-        break;
-      case kCondNE:
-        __ Bne(lhs, rhs_reg, label);
-        break;
-      case kCondLT:
-        __ Blt(lhs, rhs_reg, label);
-        break;
-      case kCondGE:
-        __ Bge(lhs, rhs_reg, label);
-        break;
-      case kCondLE:
-        __ Bge(rhs_reg, lhs, label);
-        break;
-      case kCondGT:
-        __ Blt(rhs_reg, lhs, label);
-        break;
-      case kCondB:
-        __ Bltu(lhs, rhs_reg, label);
-        break;
-      case kCondAE:
-        __ Bgeu(lhs, rhs_reg, label);
-        break;
-      case kCondBE:
-        __ Bgeu(rhs_reg, lhs, label);
-        break;
-      case kCondA:
-        __ Bltu(rhs_reg, lhs, label);
-        break;
+    bool isR6 = codegen_->GetInstructionSetFeatures().IsR6();
+    if (isR6 || !use_imm) {
+      if (use_imm) {
+        rhs_reg = TMP;
+        __ LoadConst32(rhs_reg, rhs_imm);
+      }
+      switch (cond) {
+        case kCondEQ:
+          __ Beq(lhs, rhs_reg, label);
+          break;
+        case kCondNE:
+          __ Bne(lhs, rhs_reg, label);
+          break;
+        case kCondLT:
+          __ Blt(lhs, rhs_reg, label);
+          break;
+        case kCondGE:
+          __ Bge(lhs, rhs_reg, label);
+          break;
+        case kCondLE:
+          __ Bge(rhs_reg, lhs, label);
+          break;
+        case kCondGT:
+          __ Blt(rhs_reg, lhs, label);
+          break;
+        case kCondB:
+          __ Bltu(lhs, rhs_reg, label);
+          break;
+        case kCondAE:
+          __ Bgeu(lhs, rhs_reg, label);
+          break;
+        case kCondBE:
+          __ Bgeu(rhs_reg, lhs, label);
+          break;
+        case kCondA:
+          __ Bltu(rhs_reg, lhs, label);
+          break;
+      }
+    } else {
+      // Special cases for more efficient comparison with constants on R2.
+      switch (cond) {
+        case kCondEQ:
+          __ LoadConst32(TMP, rhs_imm);
+          __ Beq(lhs, TMP, label);
+          break;
+        case kCondNE:
+          __ LoadConst32(TMP, rhs_imm);
+          __ Bne(lhs, TMP, label);
+          break;
+        case kCondLT:
+          if (IsInt<16>(rhs_imm)) {
+            __ Slti(TMP, lhs, rhs_imm);
+            __ Bnez(TMP, label);
+          } else {
+            __ LoadConst32(TMP, rhs_imm);
+            __ Blt(lhs, TMP, label);
+          }
+          break;
+        case kCondGE:
+          if (IsInt<16>(rhs_imm)) {
+            __ Slti(TMP, lhs, rhs_imm);
+            __ Beqz(TMP, label);
+          } else {
+            __ LoadConst32(TMP, rhs_imm);
+            __ Bge(lhs, TMP, label);
+          }
+          break;
+        case kCondLE:
+          if (IsInt<16>(rhs_imm + 1)) {
+            // Simulate lhs <= rhs via lhs < rhs + 1.
+            __ Slti(TMP, lhs, rhs_imm + 1);
+            __ Bnez(TMP, label);
+          } else {
+            __ LoadConst32(TMP, rhs_imm);
+            __ Bge(TMP, lhs, label);
+          }
+          break;
+        case kCondGT:
+          if (IsInt<16>(rhs_imm + 1)) {
+            // Simulate lhs > rhs via !(lhs < rhs + 1).
+            __ Slti(TMP, lhs, rhs_imm + 1);
+            __ Beqz(TMP, label);
+          } else {
+            __ LoadConst32(TMP, rhs_imm);
+            __ Blt(TMP, lhs, label);
+          }
+          break;
+        case kCondB:
+          if (IsInt<16>(rhs_imm)) {
+            __ Sltiu(TMP, lhs, rhs_imm);
+            __ Bnez(TMP, label);
+          } else {
+            __ LoadConst32(TMP, rhs_imm);
+            __ Bltu(lhs, TMP, label);
+          }
+          break;
+        case kCondAE:
+          if (IsInt<16>(rhs_imm)) {
+            __ Sltiu(TMP, lhs, rhs_imm);
+            __ Beqz(TMP, label);
+          } else {
+            __ LoadConst32(TMP, rhs_imm);
+            __ Bgeu(lhs, TMP, label);
+          }
+          break;
+        case kCondBE:
+          if ((rhs_imm != -1) && IsInt<16>(rhs_imm + 1)) {
+            // Simulate lhs <= rhs via lhs < rhs + 1.
+            // Note that this only works if rhs + 1 does not overflow
+            // to 0, hence the check above.
+            __ Sltiu(TMP, lhs, rhs_imm + 1);
+            __ Bnez(TMP, label);
+          } else {
+            __ LoadConst32(TMP, rhs_imm);
+            __ Bgeu(TMP, lhs, label);
+          }
+          break;
+        case kCondA:
+          if ((rhs_imm != -1) && IsInt<16>(rhs_imm + 1)) {
+            // Simulate lhs > rhs via !(lhs < rhs + 1).
+            // Note that this only works if rhs + 1 does not overflow
+            // to 0, hence the check above.
+            __ Sltiu(TMP, lhs, rhs_imm + 1);
+            __ Beqz(TMP, label);
+          } else {
+            __ LoadConst32(TMP, rhs_imm);
+            __ Bltu(TMP, lhs, label);
+          }
+          break;
+      }
     }
   }
 }
@@ -3452,6 +3660,190 @@
   }
 }
 
+bool InstructionCodeGeneratorMIPS::MaterializeFpCompareR2(IfCondition cond,
+                                                          bool gt_bias,
+                                                          Primitive::Type type,
+                                                          LocationSummary* input_locations,
+                                                          int cc) {
+  FRegister lhs = input_locations->InAt(0).AsFpuRegister<FRegister>();
+  FRegister rhs = input_locations->InAt(1).AsFpuRegister<FRegister>();
+  CHECK(!codegen_->GetInstructionSetFeatures().IsR6());
+  if (type == Primitive::kPrimFloat) {
+    switch (cond) {
+      case kCondEQ:
+        __ CeqS(cc, lhs, rhs);
+        return false;
+      case kCondNE:
+        __ CeqS(cc, lhs, rhs);
+        return true;
+      case kCondLT:
+        if (gt_bias) {
+          __ ColtS(cc, lhs, rhs);
+        } else {
+          __ CultS(cc, lhs, rhs);
+        }
+        return false;
+      case kCondLE:
+        if (gt_bias) {
+          __ ColeS(cc, lhs, rhs);
+        } else {
+          __ CuleS(cc, lhs, rhs);
+        }
+        return false;
+      case kCondGT:
+        if (gt_bias) {
+          __ CultS(cc, rhs, lhs);
+        } else {
+          __ ColtS(cc, rhs, lhs);
+        }
+        return false;
+      case kCondGE:
+        if (gt_bias) {
+          __ CuleS(cc, rhs, lhs);
+        } else {
+          __ ColeS(cc, rhs, lhs);
+        }
+        return false;
+      default:
+        LOG(FATAL) << "Unexpected non-floating-point condition";
+        UNREACHABLE();
+    }
+  } else {
+    DCHECK_EQ(type, Primitive::kPrimDouble);
+    switch (cond) {
+      case kCondEQ:
+        __ CeqD(cc, lhs, rhs);
+        return false;
+      case kCondNE:
+        __ CeqD(cc, lhs, rhs);
+        return true;
+      case kCondLT:
+        if (gt_bias) {
+          __ ColtD(cc, lhs, rhs);
+        } else {
+          __ CultD(cc, lhs, rhs);
+        }
+        return false;
+      case kCondLE:
+        if (gt_bias) {
+          __ ColeD(cc, lhs, rhs);
+        } else {
+          __ CuleD(cc, lhs, rhs);
+        }
+        return false;
+      case kCondGT:
+        if (gt_bias) {
+          __ CultD(cc, rhs, lhs);
+        } else {
+          __ ColtD(cc, rhs, lhs);
+        }
+        return false;
+      case kCondGE:
+        if (gt_bias) {
+          __ CuleD(cc, rhs, lhs);
+        } else {
+          __ ColeD(cc, rhs, lhs);
+        }
+        return false;
+      default:
+        LOG(FATAL) << "Unexpected non-floating-point condition";
+        UNREACHABLE();
+    }
+  }
+}
+
+bool InstructionCodeGeneratorMIPS::MaterializeFpCompareR6(IfCondition cond,
+                                                          bool gt_bias,
+                                                          Primitive::Type type,
+                                                          LocationSummary* input_locations,
+                                                          FRegister dst) {
+  FRegister lhs = input_locations->InAt(0).AsFpuRegister<FRegister>();
+  FRegister rhs = input_locations->InAt(1).AsFpuRegister<FRegister>();
+  CHECK(codegen_->GetInstructionSetFeatures().IsR6());
+  if (type == Primitive::kPrimFloat) {
+    switch (cond) {
+      case kCondEQ:
+        __ CmpEqS(dst, lhs, rhs);
+        return false;
+      case kCondNE:
+        __ CmpEqS(dst, lhs, rhs);
+        return true;
+      case kCondLT:
+        if (gt_bias) {
+          __ CmpLtS(dst, lhs, rhs);
+        } else {
+          __ CmpUltS(dst, lhs, rhs);
+        }
+        return false;
+      case kCondLE:
+        if (gt_bias) {
+          __ CmpLeS(dst, lhs, rhs);
+        } else {
+          __ CmpUleS(dst, lhs, rhs);
+        }
+        return false;
+      case kCondGT:
+        if (gt_bias) {
+          __ CmpUltS(dst, rhs, lhs);
+        } else {
+          __ CmpLtS(dst, rhs, lhs);
+        }
+        return false;
+      case kCondGE:
+        if (gt_bias) {
+          __ CmpUleS(dst, rhs, lhs);
+        } else {
+          __ CmpLeS(dst, rhs, lhs);
+        }
+        return false;
+      default:
+        LOG(FATAL) << "Unexpected non-floating-point condition";
+        UNREACHABLE();
+    }
+  } else {
+    DCHECK_EQ(type, Primitive::kPrimDouble);
+    switch (cond) {
+      case kCondEQ:
+        __ CmpEqD(dst, lhs, rhs);
+        return false;
+      case kCondNE:
+        __ CmpEqD(dst, lhs, rhs);
+        return true;
+      case kCondLT:
+        if (gt_bias) {
+          __ CmpLtD(dst, lhs, rhs);
+        } else {
+          __ CmpUltD(dst, lhs, rhs);
+        }
+        return false;
+      case kCondLE:
+        if (gt_bias) {
+          __ CmpLeD(dst, lhs, rhs);
+        } else {
+          __ CmpUleD(dst, lhs, rhs);
+        }
+        return false;
+      case kCondGT:
+        if (gt_bias) {
+          __ CmpUltD(dst, rhs, lhs);
+        } else {
+          __ CmpLtD(dst, rhs, lhs);
+        }
+        return false;
+      case kCondGE:
+        if (gt_bias) {
+          __ CmpUleD(dst, rhs, lhs);
+        } else {
+          __ CmpLeD(dst, rhs, lhs);
+        }
+        return false;
+      default:
+        LOG(FATAL) << "Unexpected non-floating-point condition";
+        UNREACHABLE();
+    }
+  }
+}
+
 void InstructionCodeGeneratorMIPS::GenerateFpCompareAndBranch(IfCondition cond,
                                                               bool gt_bias,
                                                               Primitive::Type type,
@@ -3505,6 +3897,7 @@
           break;
         default:
           LOG(FATAL) << "Unexpected non-floating-point condition";
+          UNREACHABLE();
       }
     } else {
       switch (cond) {
@@ -3550,6 +3943,7 @@
           break;
         default:
           LOG(FATAL) << "Unexpected non-floating-point condition";
+          UNREACHABLE();
       }
     }
   } else {
@@ -3598,6 +3992,7 @@
           break;
         default:
           LOG(FATAL) << "Unexpected non-floating-point condition";
+          UNREACHABLE();
       }
     } else {
       switch (cond) {
@@ -3643,6 +4038,7 @@
           break;
         default:
           LOG(FATAL) << "Unexpected non-floating-point condition";
+          UNREACHABLE();
       }
     }
   }
@@ -3759,30 +4155,562 @@
                         /* false_target */ nullptr);
 }
 
+// This function returns true if a conditional move can be generated for HSelect.
+// Otherwise it returns false and HSelect must be implemented in terms of conditonal
+// branches and regular moves.
+//
+// If `locations_to_set` isn't nullptr, its inputs and outputs are set for HSelect.
+//
+// While determining feasibility of a conditional move and setting inputs/outputs
+// are two distinct tasks, this function does both because they share quite a bit
+// of common logic.
+static bool CanMoveConditionally(HSelect* select, bool is_r6, LocationSummary* locations_to_set) {
+  bool materialized = IsBooleanValueOrMaterializedCondition(select->GetCondition());
+  HInstruction* cond = select->InputAt(/* condition_input_index */ 2);
+  HCondition* condition = cond->AsCondition();
+
+  Primitive::Type cond_type = materialized ? Primitive::kPrimInt : condition->InputAt(0)->GetType();
+  Primitive::Type dst_type = select->GetType();
+
+  HConstant* cst_true_value = select->GetTrueValue()->AsConstant();
+  HConstant* cst_false_value = select->GetFalseValue()->AsConstant();
+  bool is_true_value_zero_constant =
+      (cst_true_value != nullptr && cst_true_value->IsZeroBitPattern());
+  bool is_false_value_zero_constant =
+      (cst_false_value != nullptr && cst_false_value->IsZeroBitPattern());
+
+  bool can_move_conditionally = false;
+  bool use_const_for_false_in = false;
+  bool use_const_for_true_in = false;
+
+  if (!cond->IsConstant()) {
+    switch (cond_type) {
+      default:
+        switch (dst_type) {
+          default:
+            // Moving int on int condition.
+            if (is_r6) {
+              if (is_true_value_zero_constant) {
+                // seleqz out_reg, false_reg, cond_reg
+                can_move_conditionally = true;
+                use_const_for_true_in = true;
+              } else if (is_false_value_zero_constant) {
+                // selnez out_reg, true_reg, cond_reg
+                can_move_conditionally = true;
+                use_const_for_false_in = true;
+              } else if (materialized) {
+                // Not materializing unmaterialized int conditions
+                // to keep the instruction count low.
+                // selnez AT, true_reg, cond_reg
+                // seleqz TMP, false_reg, cond_reg
+                // or out_reg, AT, TMP
+                can_move_conditionally = true;
+              }
+            } else {
+              // movn out_reg, true_reg/ZERO, cond_reg
+              can_move_conditionally = true;
+              use_const_for_true_in = is_true_value_zero_constant;
+            }
+            break;
+          case Primitive::kPrimLong:
+            // Moving long on int condition.
+            if (is_r6) {
+              if (is_true_value_zero_constant) {
+                // seleqz out_reg_lo, false_reg_lo, cond_reg
+                // seleqz out_reg_hi, false_reg_hi, cond_reg
+                can_move_conditionally = true;
+                use_const_for_true_in = true;
+              } else if (is_false_value_zero_constant) {
+                // selnez out_reg_lo, true_reg_lo, cond_reg
+                // selnez out_reg_hi, true_reg_hi, cond_reg
+                can_move_conditionally = true;
+                use_const_for_false_in = true;
+              }
+              // Other long conditional moves would generate 6+ instructions,
+              // which is too many.
+            } else {
+              // movn out_reg_lo, true_reg_lo/ZERO, cond_reg
+              // movn out_reg_hi, true_reg_hi/ZERO, cond_reg
+              can_move_conditionally = true;
+              use_const_for_true_in = is_true_value_zero_constant;
+            }
+            break;
+          case Primitive::kPrimFloat:
+          case Primitive::kPrimDouble:
+            // Moving float/double on int condition.
+            if (is_r6) {
+              if (materialized) {
+                // Not materializing unmaterialized int conditions
+                // to keep the instruction count low.
+                can_move_conditionally = true;
+                if (is_true_value_zero_constant) {
+                  // sltu TMP, ZERO, cond_reg
+                  // mtc1 TMP, temp_cond_reg
+                  // seleqz.fmt out_reg, false_reg, temp_cond_reg
+                  use_const_for_true_in = true;
+                } else if (is_false_value_zero_constant) {
+                  // sltu TMP, ZERO, cond_reg
+                  // mtc1 TMP, temp_cond_reg
+                  // selnez.fmt out_reg, true_reg, temp_cond_reg
+                  use_const_for_false_in = true;
+                } else {
+                  // sltu TMP, ZERO, cond_reg
+                  // mtc1 TMP, temp_cond_reg
+                  // sel.fmt temp_cond_reg, false_reg, true_reg
+                  // mov.fmt out_reg, temp_cond_reg
+                }
+              }
+            } else {
+              // movn.fmt out_reg, true_reg, cond_reg
+              can_move_conditionally = true;
+            }
+            break;
+        }
+        break;
+      case Primitive::kPrimLong:
+        // We don't materialize long comparison now
+        // and use conditional branches instead.
+        break;
+      case Primitive::kPrimFloat:
+      case Primitive::kPrimDouble:
+        switch (dst_type) {
+          default:
+            // Moving int on float/double condition.
+            if (is_r6) {
+              if (is_true_value_zero_constant) {
+                // mfc1 TMP, temp_cond_reg
+                // seleqz out_reg, false_reg, TMP
+                can_move_conditionally = true;
+                use_const_for_true_in = true;
+              } else if (is_false_value_zero_constant) {
+                // mfc1 TMP, temp_cond_reg
+                // selnez out_reg, true_reg, TMP
+                can_move_conditionally = true;
+                use_const_for_false_in = true;
+              } else {
+                // mfc1 TMP, temp_cond_reg
+                // selnez AT, true_reg, TMP
+                // seleqz TMP, false_reg, TMP
+                // or out_reg, AT, TMP
+                can_move_conditionally = true;
+              }
+            } else {
+              // movt out_reg, true_reg/ZERO, cc
+              can_move_conditionally = true;
+              use_const_for_true_in = is_true_value_zero_constant;
+            }
+            break;
+          case Primitive::kPrimLong:
+            // Moving long on float/double condition.
+            if (is_r6) {
+              if (is_true_value_zero_constant) {
+                // mfc1 TMP, temp_cond_reg
+                // seleqz out_reg_lo, false_reg_lo, TMP
+                // seleqz out_reg_hi, false_reg_hi, TMP
+                can_move_conditionally = true;
+                use_const_for_true_in = true;
+              } else if (is_false_value_zero_constant) {
+                // mfc1 TMP, temp_cond_reg
+                // selnez out_reg_lo, true_reg_lo, TMP
+                // selnez out_reg_hi, true_reg_hi, TMP
+                can_move_conditionally = true;
+                use_const_for_false_in = true;
+              }
+              // Other long conditional moves would generate 6+ instructions,
+              // which is too many.
+            } else {
+              // movt out_reg_lo, true_reg_lo/ZERO, cc
+              // movt out_reg_hi, true_reg_hi/ZERO, cc
+              can_move_conditionally = true;
+              use_const_for_true_in = is_true_value_zero_constant;
+            }
+            break;
+          case Primitive::kPrimFloat:
+          case Primitive::kPrimDouble:
+            // Moving float/double on float/double condition.
+            if (is_r6) {
+              can_move_conditionally = true;
+              if (is_true_value_zero_constant) {
+                // seleqz.fmt out_reg, false_reg, temp_cond_reg
+                use_const_for_true_in = true;
+              } else if (is_false_value_zero_constant) {
+                // selnez.fmt out_reg, true_reg, temp_cond_reg
+                use_const_for_false_in = true;
+              } else {
+                // sel.fmt temp_cond_reg, false_reg, true_reg
+                // mov.fmt out_reg, temp_cond_reg
+              }
+            } else {
+              // movt.fmt out_reg, true_reg, cc
+              can_move_conditionally = true;
+            }
+            break;
+        }
+        break;
+    }
+  }
+
+  if (can_move_conditionally) {
+    DCHECK(!use_const_for_false_in || !use_const_for_true_in);
+  } else {
+    DCHECK(!use_const_for_false_in);
+    DCHECK(!use_const_for_true_in);
+  }
+
+  if (locations_to_set != nullptr) {
+    if (use_const_for_false_in) {
+      locations_to_set->SetInAt(0, Location::ConstantLocation(cst_false_value));
+    } else {
+      locations_to_set->SetInAt(0,
+                                Primitive::IsFloatingPointType(dst_type)
+                                    ? Location::RequiresFpuRegister()
+                                    : Location::RequiresRegister());
+    }
+    if (use_const_for_true_in) {
+      locations_to_set->SetInAt(1, Location::ConstantLocation(cst_true_value));
+    } else {
+      locations_to_set->SetInAt(1,
+                                Primitive::IsFloatingPointType(dst_type)
+                                    ? Location::RequiresFpuRegister()
+                                    : Location::RequiresRegister());
+    }
+    if (materialized) {
+      locations_to_set->SetInAt(2, Location::RequiresRegister());
+    }
+    // On R6 we don't require the output to be the same as the
+    // first input for conditional moves unlike on R2.
+    bool is_out_same_as_first_in = !can_move_conditionally || !is_r6;
+    if (is_out_same_as_first_in) {
+      locations_to_set->SetOut(Location::SameAsFirstInput());
+    } else {
+      locations_to_set->SetOut(Primitive::IsFloatingPointType(dst_type)
+                                   ? Location::RequiresFpuRegister()
+                                   : Location::RequiresRegister());
+    }
+  }
+
+  return can_move_conditionally;
+}
+
+void InstructionCodeGeneratorMIPS::GenConditionalMoveR2(HSelect* select) {
+  LocationSummary* locations = select->GetLocations();
+  Location dst = locations->Out();
+  Location src = locations->InAt(1);
+  Register src_reg = ZERO;
+  Register src_reg_high = ZERO;
+  HInstruction* cond = select->InputAt(/* condition_input_index */ 2);
+  Register cond_reg = TMP;
+  int cond_cc = 0;
+  Primitive::Type cond_type = Primitive::kPrimInt;
+  bool cond_inverted = false;
+  Primitive::Type dst_type = select->GetType();
+
+  if (IsBooleanValueOrMaterializedCondition(cond)) {
+    cond_reg = locations->InAt(/* condition_input_index */ 2).AsRegister<Register>();
+  } else {
+    HCondition* condition = cond->AsCondition();
+    LocationSummary* cond_locations = cond->GetLocations();
+    IfCondition if_cond = condition->GetCondition();
+    cond_type = condition->InputAt(0)->GetType();
+    switch (cond_type) {
+      default:
+        DCHECK_NE(cond_type, Primitive::kPrimLong);
+        cond_inverted = MaterializeIntCompare(if_cond, cond_locations, cond_reg);
+        break;
+      case Primitive::kPrimFloat:
+      case Primitive::kPrimDouble:
+        cond_inverted = MaterializeFpCompareR2(if_cond,
+                                               condition->IsGtBias(),
+                                               cond_type,
+                                               cond_locations,
+                                               cond_cc);
+        break;
+    }
+  }
+
+  DCHECK(dst.Equals(locations->InAt(0)));
+  if (src.IsRegister()) {
+    src_reg = src.AsRegister<Register>();
+  } else if (src.IsRegisterPair()) {
+    src_reg = src.AsRegisterPairLow<Register>();
+    src_reg_high = src.AsRegisterPairHigh<Register>();
+  } else if (src.IsConstant()) {
+    DCHECK(src.GetConstant()->IsZeroBitPattern());
+  }
+
+  switch (cond_type) {
+    default:
+      switch (dst_type) {
+        default:
+          if (cond_inverted) {
+            __ Movz(dst.AsRegister<Register>(), src_reg, cond_reg);
+          } else {
+            __ Movn(dst.AsRegister<Register>(), src_reg, cond_reg);
+          }
+          break;
+        case Primitive::kPrimLong:
+          if (cond_inverted) {
+            __ Movz(dst.AsRegisterPairLow<Register>(), src_reg, cond_reg);
+            __ Movz(dst.AsRegisterPairHigh<Register>(), src_reg_high, cond_reg);
+          } else {
+            __ Movn(dst.AsRegisterPairLow<Register>(), src_reg, cond_reg);
+            __ Movn(dst.AsRegisterPairHigh<Register>(), src_reg_high, cond_reg);
+          }
+          break;
+        case Primitive::kPrimFloat:
+          if (cond_inverted) {
+            __ MovzS(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_reg);
+          } else {
+            __ MovnS(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_reg);
+          }
+          break;
+        case Primitive::kPrimDouble:
+          if (cond_inverted) {
+            __ MovzD(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_reg);
+          } else {
+            __ MovnD(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_reg);
+          }
+          break;
+      }
+      break;
+    case Primitive::kPrimLong:
+      LOG(FATAL) << "Unreachable";
+      UNREACHABLE();
+    case Primitive::kPrimFloat:
+    case Primitive::kPrimDouble:
+      switch (dst_type) {
+        default:
+          if (cond_inverted) {
+            __ Movf(dst.AsRegister<Register>(), src_reg, cond_cc);
+          } else {
+            __ Movt(dst.AsRegister<Register>(), src_reg, cond_cc);
+          }
+          break;
+        case Primitive::kPrimLong:
+          if (cond_inverted) {
+            __ Movf(dst.AsRegisterPairLow<Register>(), src_reg, cond_cc);
+            __ Movf(dst.AsRegisterPairHigh<Register>(), src_reg_high, cond_cc);
+          } else {
+            __ Movt(dst.AsRegisterPairLow<Register>(), src_reg, cond_cc);
+            __ Movt(dst.AsRegisterPairHigh<Register>(), src_reg_high, cond_cc);
+          }
+          break;
+        case Primitive::kPrimFloat:
+          if (cond_inverted) {
+            __ MovfS(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_cc);
+          } else {
+            __ MovtS(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_cc);
+          }
+          break;
+        case Primitive::kPrimDouble:
+          if (cond_inverted) {
+            __ MovfD(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_cc);
+          } else {
+            __ MovtD(dst.AsFpuRegister<FRegister>(), src.AsFpuRegister<FRegister>(), cond_cc);
+          }
+          break;
+      }
+      break;
+  }
+}
+
+void InstructionCodeGeneratorMIPS::GenConditionalMoveR6(HSelect* select) {
+  LocationSummary* locations = select->GetLocations();
+  Location dst = locations->Out();
+  Location false_src = locations->InAt(0);
+  Location true_src = locations->InAt(1);
+  HInstruction* cond = select->InputAt(/* condition_input_index */ 2);
+  Register cond_reg = TMP;
+  FRegister fcond_reg = FTMP;
+  Primitive::Type cond_type = Primitive::kPrimInt;
+  bool cond_inverted = false;
+  Primitive::Type dst_type = select->GetType();
+
+  if (IsBooleanValueOrMaterializedCondition(cond)) {
+    cond_reg = locations->InAt(/* condition_input_index */ 2).AsRegister<Register>();
+  } else {
+    HCondition* condition = cond->AsCondition();
+    LocationSummary* cond_locations = cond->GetLocations();
+    IfCondition if_cond = condition->GetCondition();
+    cond_type = condition->InputAt(0)->GetType();
+    switch (cond_type) {
+      default:
+        DCHECK_NE(cond_type, Primitive::kPrimLong);
+        cond_inverted = MaterializeIntCompare(if_cond, cond_locations, cond_reg);
+        break;
+      case Primitive::kPrimFloat:
+      case Primitive::kPrimDouble:
+        cond_inverted = MaterializeFpCompareR6(if_cond,
+                                               condition->IsGtBias(),
+                                               cond_type,
+                                               cond_locations,
+                                               fcond_reg);
+        break;
+    }
+  }
+
+  if (true_src.IsConstant()) {
+    DCHECK(true_src.GetConstant()->IsZeroBitPattern());
+  }
+  if (false_src.IsConstant()) {
+    DCHECK(false_src.GetConstant()->IsZeroBitPattern());
+  }
+
+  switch (dst_type) {
+    default:
+      if (Primitive::IsFloatingPointType(cond_type)) {
+        __ Mfc1(cond_reg, fcond_reg);
+      }
+      if (true_src.IsConstant()) {
+        if (cond_inverted) {
+          __ Selnez(dst.AsRegister<Register>(), false_src.AsRegister<Register>(), cond_reg);
+        } else {
+          __ Seleqz(dst.AsRegister<Register>(), false_src.AsRegister<Register>(), cond_reg);
+        }
+      } else if (false_src.IsConstant()) {
+        if (cond_inverted) {
+          __ Seleqz(dst.AsRegister<Register>(), true_src.AsRegister<Register>(), cond_reg);
+        } else {
+          __ Selnez(dst.AsRegister<Register>(), true_src.AsRegister<Register>(), cond_reg);
+        }
+      } else {
+        DCHECK_NE(cond_reg, AT);
+        if (cond_inverted) {
+          __ Seleqz(AT, true_src.AsRegister<Register>(), cond_reg);
+          __ Selnez(TMP, false_src.AsRegister<Register>(), cond_reg);
+        } else {
+          __ Selnez(AT, true_src.AsRegister<Register>(), cond_reg);
+          __ Seleqz(TMP, false_src.AsRegister<Register>(), cond_reg);
+        }
+        __ Or(dst.AsRegister<Register>(), AT, TMP);
+      }
+      break;
+    case Primitive::kPrimLong: {
+      if (Primitive::IsFloatingPointType(cond_type)) {
+        __ Mfc1(cond_reg, fcond_reg);
+      }
+      Register dst_lo = dst.AsRegisterPairLow<Register>();
+      Register dst_hi = dst.AsRegisterPairHigh<Register>();
+      if (true_src.IsConstant()) {
+        Register src_lo = false_src.AsRegisterPairLow<Register>();
+        Register src_hi = false_src.AsRegisterPairHigh<Register>();
+        if (cond_inverted) {
+          __ Selnez(dst_lo, src_lo, cond_reg);
+          __ Selnez(dst_hi, src_hi, cond_reg);
+        } else {
+          __ Seleqz(dst_lo, src_lo, cond_reg);
+          __ Seleqz(dst_hi, src_hi, cond_reg);
+        }
+      } else {
+        DCHECK(false_src.IsConstant());
+        Register src_lo = true_src.AsRegisterPairLow<Register>();
+        Register src_hi = true_src.AsRegisterPairHigh<Register>();
+        if (cond_inverted) {
+          __ Seleqz(dst_lo, src_lo, cond_reg);
+          __ Seleqz(dst_hi, src_hi, cond_reg);
+        } else {
+          __ Selnez(dst_lo, src_lo, cond_reg);
+          __ Selnez(dst_hi, src_hi, cond_reg);
+        }
+      }
+      break;
+    }
+    case Primitive::kPrimFloat: {
+      if (!Primitive::IsFloatingPointType(cond_type)) {
+        // sel*.fmt tests bit 0 of the condition register, account for that.
+        __ Sltu(TMP, ZERO, cond_reg);
+        __ Mtc1(TMP, fcond_reg);
+      }
+      FRegister dst_reg = dst.AsFpuRegister<FRegister>();
+      if (true_src.IsConstant()) {
+        FRegister src_reg = false_src.AsFpuRegister<FRegister>();
+        if (cond_inverted) {
+          __ SelnezS(dst_reg, src_reg, fcond_reg);
+        } else {
+          __ SeleqzS(dst_reg, src_reg, fcond_reg);
+        }
+      } else if (false_src.IsConstant()) {
+        FRegister src_reg = true_src.AsFpuRegister<FRegister>();
+        if (cond_inverted) {
+          __ SeleqzS(dst_reg, src_reg, fcond_reg);
+        } else {
+          __ SelnezS(dst_reg, src_reg, fcond_reg);
+        }
+      } else {
+        if (cond_inverted) {
+          __ SelS(fcond_reg,
+                  true_src.AsFpuRegister<FRegister>(),
+                  false_src.AsFpuRegister<FRegister>());
+        } else {
+          __ SelS(fcond_reg,
+                  false_src.AsFpuRegister<FRegister>(),
+                  true_src.AsFpuRegister<FRegister>());
+        }
+        __ MovS(dst_reg, fcond_reg);
+      }
+      break;
+    }
+    case Primitive::kPrimDouble: {
+      if (!Primitive::IsFloatingPointType(cond_type)) {
+        // sel*.fmt tests bit 0 of the condition register, account for that.
+        __ Sltu(TMP, ZERO, cond_reg);
+        __ Mtc1(TMP, fcond_reg);
+      }
+      FRegister dst_reg = dst.AsFpuRegister<FRegister>();
+      if (true_src.IsConstant()) {
+        FRegister src_reg = false_src.AsFpuRegister<FRegister>();
+        if (cond_inverted) {
+          __ SelnezD(dst_reg, src_reg, fcond_reg);
+        } else {
+          __ SeleqzD(dst_reg, src_reg, fcond_reg);
+        }
+      } else if (false_src.IsConstant()) {
+        FRegister src_reg = true_src.AsFpuRegister<FRegister>();
+        if (cond_inverted) {
+          __ SeleqzD(dst_reg, src_reg, fcond_reg);
+        } else {
+          __ SelnezD(dst_reg, src_reg, fcond_reg);
+        }
+      } else {
+        if (cond_inverted) {
+          __ SelD(fcond_reg,
+                  true_src.AsFpuRegister<FRegister>(),
+                  false_src.AsFpuRegister<FRegister>());
+        } else {
+          __ SelD(fcond_reg,
+                  false_src.AsFpuRegister<FRegister>(),
+                  true_src.AsFpuRegister<FRegister>());
+        }
+        __ MovD(dst_reg, fcond_reg);
+      }
+      break;
+    }
+  }
+}
+
 void LocationsBuilderMIPS::VisitSelect(HSelect* select) {
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
-  if (Primitive::IsFloatingPointType(select->GetType())) {
-    locations->SetInAt(0, Location::RequiresFpuRegister());
-    locations->SetInAt(1, Location::RequiresFpuRegister());
-  } else {
-    locations->SetInAt(0, Location::RequiresRegister());
-    locations->SetInAt(1, Location::RequiresRegister());
-  }
-  if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
-    locations->SetInAt(2, Location::RequiresRegister());
-  }
-  locations->SetOut(Location::SameAsFirstInput());
+  CanMoveConditionally(select, codegen_->GetInstructionSetFeatures().IsR6(), locations);
 }
 
 void InstructionCodeGeneratorMIPS::VisitSelect(HSelect* select) {
-  LocationSummary* locations = select->GetLocations();
-  MipsLabel false_target;
-  GenerateTestAndBranch(select,
-                        /* condition_input_index */ 2,
-                        /* true_target */ nullptr,
-                        &false_target);
-  codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
-  __ Bind(&false_target);
+  bool is_r6 = codegen_->GetInstructionSetFeatures().IsR6();
+  if (CanMoveConditionally(select, is_r6, /* locations_to_set */ nullptr)) {
+    if (is_r6) {
+      GenConditionalMoveR6(select);
+    } else {
+      GenConditionalMoveR2(select);
+    }
+  } else {
+    LocationSummary* locations = select->GetLocations();
+    MipsLabel false_target;
+    GenerateTestAndBranch(select,
+                          /* condition_input_index */ 2,
+                          /* true_target */ nullptr,
+                          &false_target);
+    codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
+    __ Bind(&false_target);
+  }
 }
 
 void LocationsBuilderMIPS::VisitNativeDebugInfo(HNativeDebugInfo* info) {
@@ -4273,10 +5201,6 @@
       break;
     case HLoadString::LoadKind::kBootImageAddress:
       break;
-    case HLoadString::LoadKind::kDexCacheAddress:
-      DCHECK(Runtime::Current()->UseJitCompilation());
-      fallback_load = false;
-      break;
     case HLoadString::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
@@ -4774,7 +5698,12 @@
     default:
       break;
   }
-  locations->SetOut(Location::RequiresRegister());
+  if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod) {
+    InvokeRuntimeCallingConvention calling_convention;
+    locations->SetOut(calling_convention.GetReturnLocation(load->GetType()));
+  } else {
+    locations->SetOut(Location::RequiresRegister());
+  }
 }
 
 void InstructionCodeGeneratorMIPS::VisitLoadString(HLoadString* load) {
diff --git a/compiler/optimizing/code_generator_mips.h b/compiler/optimizing/code_generator_mips.h
index 0e8d8d4..e132819 100644
--- a/compiler/optimizing/code_generator_mips.h
+++ b/compiler/optimizing/code_generator_mips.h
@@ -247,6 +247,12 @@
                                Register obj,
                                uint32_t offset);
   void GenerateIntCompare(IfCondition cond, LocationSummary* locations);
+  // When the function returns `false` it means that the condition holds if `dst` is non-zero
+  // and doesn't hold if `dst` is zero. If it returns `true`, the roles of zero and non-zero
+  // `dst` are exchanged.
+  bool MaterializeIntCompare(IfCondition cond,
+                             LocationSummary* input_locations,
+                             Register dst);
   void GenerateIntCompareAndBranch(IfCondition cond,
                                    LocationSummary* locations,
                                    MipsLabel* label);
@@ -257,6 +263,22 @@
                          bool gt_bias,
                          Primitive::Type type,
                          LocationSummary* locations);
+  // When the function returns `false` it means that the condition holds if the condition
+  // code flag `cc` is non-zero and doesn't hold if `cc` is zero. If it returns `true`,
+  // the roles of zero and non-zero values of the `cc` flag are exchanged.
+  bool MaterializeFpCompareR2(IfCondition cond,
+                              bool gt_bias,
+                              Primitive::Type type,
+                              LocationSummary* input_locations,
+                              int cc);
+  // When the function returns `false` it means that the condition holds if `dst` is non-zero
+  // and doesn't hold if `dst` is zero. If it returns `true`, the roles of zero and non-zero
+  // `dst` are exchanged.
+  bool MaterializeFpCompareR6(IfCondition cond,
+                              bool gt_bias,
+                              Primitive::Type type,
+                              LocationSummary* input_locations,
+                              FRegister dst);
   void GenerateFpCompareAndBranch(IfCondition cond,
                                   bool gt_bias,
                                   Primitive::Type type,
@@ -283,6 +305,8 @@
                                  uint32_t num_entries,
                                  HBasicBlock* switch_block,
                                  HBasicBlock* default_block);
+  void GenConditionalMoveR2(HSelect* select);
+  void GenConditionalMoveR6(HSelect* select);
 
   MipsAssembler* const assembler_;
   CodeGeneratorMIPS* const codegen_;
@@ -342,9 +366,6 @@
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
 
-  // Blocks all register pairs made out of blocked core registers.
-  void UpdateBlockedPairRegisters() const;
-
   InstructionSet GetInstructionSet() const OVERRIDE { return InstructionSet::kMips; }
 
   const MipsInstructionSetFeatures& GetInstructionSetFeatures() const {
diff --git a/compiler/optimizing/code_generator_mips64.cc b/compiler/optimizing/code_generator_mips64.cc
index 02576bd..1a54935 100644
--- a/compiler/optimizing/code_generator_mips64.cc
+++ b/compiler/optimizing/code_generator_mips64.cc
@@ -322,7 +322,7 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0) : locations->Out();
+
     uint32_t dex_pc = instruction_->GetDexPc();
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
@@ -334,24 +334,22 @@
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
-    codegen->EmitParallelMoves(locations->InAt(1),
+    codegen->EmitParallelMoves(locations->InAt(0),
                                Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
                                Primitive::kPrimNot,
-                               object_class,
+                               locations->InAt(1),
                                Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
                                Primitive::kPrimNot);
-
     if (instruction_->IsInstanceOf()) {
       mips64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
-      CheckEntrypointTypes<
-          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
       Primitive::Type ret_type = instruction_->GetType();
       Location ret_loc = calling_convention.GetReturnLocation(ret_type);
       mips64_codegen->MoveLocation(locations->Out(), ret_loc, ret_type);
     } else {
       DCHECK(instruction_->IsCheckCast());
-      mips64_codegen->InvokeRuntime(kQuickCheckCast, instruction_, dex_pc, this);
-      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
+      mips64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
+      CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
     }
 
     RestoreLiveRegisters(codegen, locations);
@@ -556,9 +554,14 @@
 
   __ IncreaseFrameSize(GetFrameSize() - FrameEntrySpillSize());
 
-  static_assert(IsInt<16>(kCurrentMethodStackOffset),
-                "kCurrentMethodStackOffset must fit into int16_t");
-  __ Sd(kMethodRegisterArgument, SP, kCurrentMethodStackOffset);
+  // Save the current method if we need it. Note that we do not
+  // do this in HCurrentMethod, as the instruction might have been removed
+  // in the SSA graph.
+  if (RequiresCurrentMethod()) {
+    static_assert(IsInt<16>(kCurrentMethodStackOffset),
+                  "kCurrentMethodStackOffset must fit into int16_t");
+    __ Sd(kMethodRegisterArgument, SP, kCurrentMethodStackOffset);
+  }
 }
 
 void CodeGeneratorMIPS64::GenerateFrameExit() {
diff --git a/compiler/optimizing/code_generator_x86.cc b/compiler/optimizing/code_generator_x86.cc
index 0b23599..7e4ad26 100644
--- a/compiler/optimizing/code_generator_x86.cc
+++ b/compiler/optimizing/code_generator_x86.cc
@@ -151,7 +151,7 @@
       }
       __ movl(length_loc.AsRegister<Register>(), array_len);
       if (mirror::kUseStringCompression) {
-        __ andl(length_loc.AsRegister<Register>(), Immediate(INT32_MAX));
+        __ shrl(length_loc.AsRegister<Register>(), Immediate(1));
       }
     }
     x86_codegen->EmitParallelMoves(
@@ -312,8 +312,6 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
-                                                        : locations->Out();
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
 
@@ -327,25 +325,25 @@
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
-    x86_codegen->EmitParallelMoves(
-        locations->InAt(1),
-        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-        Primitive::kPrimNot,
-        object_class,
-        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
-        Primitive::kPrimNot);
-
+    x86_codegen->EmitParallelMoves(locations->InAt(0),
+                                   Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                                   Primitive::kPrimNot,
+                                   locations->InAt(1),
+                                   Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                                   Primitive::kPrimNot);
     if (instruction_->IsInstanceOf()) {
       x86_codegen->InvokeRuntime(kQuickInstanceofNonTrivial,
                                  instruction_,
                                  instruction_->GetDexPc(),
                                  this);
-      CheckEntrypointTypes<
-          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
-      x86_codegen->InvokeRuntime(kQuickCheckCast, instruction_, instruction_->GetDexPc(), this);
-      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
+      x86_codegen->InvokeRuntime(kQuickCheckInstanceOf,
+                                 instruction_,
+                                 instruction_->GetDexPc(),
+                                 this);
+      CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
     }
 
     if (!is_fatal_) {
@@ -426,11 +424,25 @@
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86);
 };
 
-// Slow path marking an object during a read barrier.
+// Slow path marking an object reference `ref` during a read
+// barrier. The field `obj.field` in the object `obj` holding this
+// reference does not get updated by this slow path after marking (see
+// ReadBarrierMarkAndUpdateFieldSlowPathX86 below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
 class ReadBarrierMarkSlowPathX86 : public SlowPathCode {
  public:
-  ReadBarrierMarkSlowPathX86(HInstruction* instruction, Location obj, bool unpoison)
-      : SlowPathCode(instruction), obj_(obj), unpoison_(unpoison) {
+  ReadBarrierMarkSlowPathX86(HInstruction* instruction,
+                             Location ref,
+                             bool unpoison_ref_before_marking)
+      : SlowPathCode(instruction),
+        ref_(ref),
+        unpoison_ref_before_marking_(unpoison_ref_before_marking) {
     DCHECK(kEmitCompilerReadBarrier);
   }
 
@@ -438,9 +450,9 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Register reg = obj_.AsRegister<Register>();
+    Register ref_reg = ref_.AsRegister<Register>();
     DCHECK(locations->CanCall());
-    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg));
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
     DCHECK(instruction_->IsInstanceFieldGet() ||
            instruction_->IsStaticFieldGet() ||
            instruction_->IsArrayGet() ||
@@ -455,44 +467,211 @@
         << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
-    if (unpoison_) {
+    if (unpoison_ref_before_marking_) {
       // Object* ref = ref_addr->AsMirrorPtr()
-      __ MaybeUnpoisonHeapReference(reg);
+      __ MaybeUnpoisonHeapReference(ref_reg);
     }
     // No need to save live registers; it's taken care of by the
     // entrypoint. Also, there is no need to update the stack mask,
     // as this runtime call will not trigger a garbage collection.
     CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
-    DCHECK_NE(reg, ESP);
-    DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg;
+    DCHECK_NE(ref_reg, ESP);
+    DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
     // "Compact" slow path, saving two moves.
     //
     // Instead of using the standard runtime calling convention (input
     // and output in EAX):
     //
-    //   EAX <- obj
+    //   EAX <- ref
     //   EAX <- ReadBarrierMark(EAX)
-    //   obj <- EAX
+    //   ref <- EAX
     //
-    // we just use rX (the register holding `obj`) as input and output
+    // we just use rX (the register containing `ref`) as input and output
     // of a dedicated entrypoint:
     //
     //   rX <- ReadBarrierMarkRegX(rX)
     //
     int32_t entry_point_offset =
-        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(reg);
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
     // This runtime call does not require a stack map.
     x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
     __ jmp(GetExitLabel());
   }
 
  private:
-  const Location obj_;
-  const bool unpoison_;
+  // The location (register) of the marked object reference.
+  const Location ref_;
+  // Should the reference in `ref_` be unpoisoned prior to marking it?
+  const bool unpoison_ref_before_marking_;
 
   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86);
 };
 
+// Slow path marking an object reference `ref` during a read barrier,
+// and if needed, atomically updating the field `obj.field` in the
+// object `obj` holding this reference after marking (contrary to
+// ReadBarrierMarkSlowPathX86 above, which never tries to update
+// `obj.field`).
+//
+// This means that after the execution of this slow path, both `ref`
+// and `obj.field` will be up-to-date; i.e., after the flip, both will
+// hold the same to-space reference (unless another thread installed
+// another object reference (different from `ref`) in `obj.field`).
+class ReadBarrierMarkAndUpdateFieldSlowPathX86 : public SlowPathCode {
+ public:
+  ReadBarrierMarkAndUpdateFieldSlowPathX86(HInstruction* instruction,
+                                           Location ref,
+                                           Register obj,
+                                           const Address& field_addr,
+                                           bool unpoison_ref_before_marking,
+                                           Register temp)
+      : SlowPathCode(instruction),
+        ref_(ref),
+        obj_(obj),
+        field_addr_(field_addr),
+        unpoison_ref_before_marking_(unpoison_ref_before_marking),
+        temp_(temp) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkAndUpdateFieldSlowPathX86"; }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    Register ref_reg = ref_.AsRegister<Register>();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+    // This slow path is only used by the UnsafeCASObject intrinsic.
+    DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier marking and field updating slow path: "
+        << instruction_->DebugName();
+    DCHECK(instruction_->GetLocations()->Intrinsified());
+    DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
+
+    __ Bind(GetEntryLabel());
+    if (unpoison_ref_before_marking_) {
+      // Object* ref = ref_addr->AsMirrorPtr()
+      __ MaybeUnpoisonHeapReference(ref_reg);
+    }
+
+    // Save the old (unpoisoned) reference.
+    __ movl(temp_, ref_reg);
+
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
+    DCHECK_NE(ref_reg, ESP);
+    DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
+    // "Compact" slow path, saving two moves.
+    //
+    // Instead of using the standard runtime calling convention (input
+    // and output in EAX):
+    //
+    //   EAX <- ref
+    //   EAX <- ReadBarrierMark(EAX)
+    //   ref <- EAX
+    //
+    // we just use rX (the register containing `ref`) as input and output
+    // of a dedicated entrypoint:
+    //
+    //   rX <- ReadBarrierMarkRegX(rX)
+    //
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(ref_reg);
+    // This runtime call does not require a stack map.
+    x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+
+    // If the new reference is different from the old reference,
+    // update the field in the holder (`*field_addr`).
+    //
+    // Note that this field could also hold a different object, if
+    // another thread had concurrently changed it. In that case, the
+    // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
+    // operation below would abort the CAS, leaving the field as-is.
+    NearLabel done;
+    __ cmpl(temp_, ref_reg);
+    __ j(kEqual, &done);
+
+    // Update the the holder's field atomically.  This may fail if
+    // mutator updates before us, but it's OK.  This is achieved
+    // using a strong compare-and-set (CAS) operation with relaxed
+    // memory synchronization ordering, where the expected value is
+    // the old reference and the desired value is the new reference.
+    // This operation is implemented with a 32-bit LOCK CMPXLCHG
+    // instruction, which requires the expected value (the old
+    // reference) to be in EAX.  Save EAX beforehand, and move the
+    // expected value (stored in `temp_`) into EAX.
+    __ pushl(EAX);
+    __ movl(EAX, temp_);
+
+    // Convenience aliases.
+    Register base = obj_;
+    Register expected = EAX;
+    Register value = ref_reg;
+
+    bool base_equals_value = (base == value);
+    if (kPoisonHeapReferences) {
+      if (base_equals_value) {
+        // If `base` and `value` are the same register location, move
+        // `value` to a temporary register.  This way, poisoning
+        // `value` won't invalidate `base`.
+        value = temp_;
+        __ movl(value, base);
+      }
+
+      // Check that the register allocator did not assign the location
+      // of `expected` (EAX) to `value` nor to `base`, so that heap
+      // poisoning (when enabled) works as intended below.
+      // - If `value` were equal to `expected`, both references would
+      //   be poisoned twice, meaning they would not be poisoned at
+      //   all, as heap poisoning uses address negation.
+      // - If `base` were equal to `expected`, poisoning `expected`
+      //   would invalidate `base`.
+      DCHECK_NE(value, expected);
+      DCHECK_NE(base, expected);
+
+      __ PoisonHeapReference(expected);
+      __ PoisonHeapReference(value);
+    }
+
+    __ LockCmpxchgl(field_addr_, value);
+
+    // If heap poisoning is enabled, we need to unpoison the values
+    // that were poisoned earlier.
+    if (kPoisonHeapReferences) {
+      if (base_equals_value) {
+        // `value` has been moved to a temporary register, no need
+        // to unpoison it.
+      } else {
+        __ UnpoisonHeapReference(value);
+      }
+      // No need to unpoison `expected` (EAX), as it is be overwritten below.
+    }
+
+    // Restore EAX.
+    __ popl(EAX);
+
+    __ Bind(&done);
+    __ jmp(GetExitLabel());
+  }
+
+ private:
+  // The location (register) of the marked object reference.
+  const Location ref_;
+  // The register containing the object holding the marked object reference field.
+  const Register obj_;
+  // The address of the marked reference field.  The base of this address must be `obj_`.
+  const Address field_addr_;
+
+  // Should the reference in `ref_` be unpoisoned prior to marking it?
+  const bool unpoison_ref_before_marking_;
+
+  const Register temp_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86);
+};
+
 // Slow path generating a read barrier for a heap reference.
 class ReadBarrierForHeapReferenceSlowPathX86 : public SlowPathCode {
  public:
@@ -841,24 +1020,8 @@
 }
 
 void CodeGeneratorX86::SetupBlockedRegisters() const {
-  // Don't allocate the dalvik style register pair passing.
-  blocked_register_pairs_[ECX_EDX] = true;
-
   // Stack register is always reserved.
   blocked_core_registers_[ESP] = true;
-
-  UpdateBlockedPairRegisters();
-}
-
-void CodeGeneratorX86::UpdateBlockedPairRegisters() const {
-  for (int i = 0; i < kNumberOfRegisterPairs; i++) {
-    X86ManagedRegister current =
-        X86ManagedRegister::FromRegisterPair(static_cast<RegisterPair>(i));
-    if (blocked_core_registers_[current.AsRegisterPairLow()]
-        || blocked_core_registers_[current.AsRegisterPairHigh()]) {
-      blocked_register_pairs_[i] = true;
-    }
-  }
 }
 
 InstructionCodeGeneratorX86::InstructionCodeGeneratorX86(HGraph* graph, CodeGeneratorX86* codegen)
@@ -898,7 +1061,12 @@
   int adjust = GetFrameSize() - FrameEntrySpillSize();
   __ subl(ESP, Immediate(adjust));
   __ cfi().AdjustCFAOffset(adjust);
-  __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
+  // Save the current method if we need it. Note that we do not
+  // do this in HCurrentMethod, as the instruction might have been removed
+  // in the SSA graph.
+  if (RequiresCurrentMethod()) {
+    __ movl(Address(ESP, kCurrentMethodStackOffset), kMethodRegisterArgument);
+  }
 }
 
 void CodeGeneratorX86::GenerateFrameExit() {
@@ -5067,9 +5235,11 @@
         // Branch cases into compressed and uncompressed for each index's type.
         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
         NearLabel done, not_compressed;
-        __ cmpl(Address(obj, count_offset), Immediate(0));
+        __ testl(Address(obj, count_offset), Immediate(1));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
-        __ j(kGreaterEqual, &not_compressed);
+        static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                      "Expecting 0=compressed, 1=uncompressed");
+        __ j(kNotZero, &not_compressed);
         __ movzxb(out, CodeGeneratorX86::ArrayAddress(obj, index, TIMES_1, data_offset));
         __ jmp(&done);
         __ Bind(&not_compressed);
@@ -5419,7 +5589,7 @@
   codegen_->MaybeRecordImplicitNullCheck(instruction);
   // Mask out most significant bit in case the array is String's array of char.
   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
-    __ andl(out, Immediate(INT32_MAX));
+    __ shrl(out, Immediate(1));
   }
 }
 
@@ -5478,10 +5648,12 @@
       Location array_loc = array_length->GetLocations()->InAt(0);
       Address array_len(array_loc.AsRegister<Register>(), len_offset);
       if (is_string_compressed_char_at) {
+        // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
+        // the string compression flag) with the in-memory length and avoid the temporary.
         Register length_reg = locations->GetTemp(0).AsRegister<Register>();
         __ movl(length_reg, array_len);
         codegen_->MaybeRecordImplicitNullCheck(array_length);
-        __ andl(length_reg, Immediate(INT32_MAX));
+        __ shrl(length_reg, Immediate(1));
         codegen_->GenerateIntCompare(length_reg, index_loc);
       } else {
         // Checking bounds for general case:
@@ -5897,7 +6069,9 @@
   Register out = out_loc.AsRegister<Register>();
 
   bool generate_null_check = false;
-  const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+  const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
+      ? kWithoutReadBarrier
+      : kCompilerReadBarrierOption;
   switch (cls->GetLoadKind()) {
     case HLoadClass::LoadKind::kReferrersClass: {
       DCHECK(!cls->CanCallRuntime());
@@ -5908,25 +6082,25 @@
           cls,
           out_loc,
           Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
-          /*fixup_label*/ nullptr,
-          requires_read_barrier);
+          /* fixup_label */ nullptr,
+          read_barrier_option);
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimeAddress: {
-      DCHECK(!requires_read_barrier);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       __ movl(out, Immediate(/* placeholder */ 0));
       codegen_->RecordTypePatch(cls);
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative: {
-      DCHECK(!requires_read_barrier);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       Register method_address = locations->InAt(0).AsRegister<Register>();
       __ leal(out, Address(method_address, CodeGeneratorX86::kDummy32BitOffset));
       codegen_->RecordTypePatch(cls);
       break;
     }
     case HLoadClass::LoadKind::kBootImageAddress: {
-      DCHECK(!requires_read_barrier);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       DCHECK_NE(cls->GetAddress(), 0u);
       uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
       __ movl(out, Immediate(address));
@@ -5940,8 +6114,8 @@
       GenerateGcRootFieldLoad(cls,
                               out_loc,
                               Address::Absolute(address),
-                              /*fixup_label*/ nullptr,
-                              requires_read_barrier);
+                              /* fixup_label */ nullptr,
+                              read_barrier_option);
       generate_null_check = !cls->IsInDexCache();
       break;
     }
@@ -5954,7 +6128,7 @@
                               out_loc,
                               Address(base_reg, CodeGeneratorX86::kDummy32BitOffset),
                               fixup_label,
-                              requires_read_barrier);
+                              read_barrier_option);
       generate_null_check = !cls->IsInDexCache();
       break;
     }
@@ -5968,8 +6142,8 @@
       GenerateGcRootFieldLoad(cls,
                               out_loc,
                               Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())),
-                              /*fixup_label*/ nullptr,
-                              requires_read_barrier);
+                              /* fixup_label */ nullptr,
+                              read_barrier_option);
       generate_null_check = !cls->IsInDexCache();
       break;
     }
@@ -6042,9 +6216,6 @@
       break;
     case HLoadString::LoadKind::kBootImageAddress:
       break;
-    case HLoadString::LoadKind::kDexCacheAddress:
-      DCHECK(Runtime::Current()->UseJitCompilation());
-      break;
     case HLoadString::LoadKind::kDexCacheViaMethod:
       break;
   }
@@ -6059,8 +6230,7 @@
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   HLoadString::LoadKind load_kind = load->GetLoadKind();
-  if (load_kind == HLoadString::LoadKind::kDexCacheViaMethod ||
-      load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
+  if (load_kind == HLoadString::LoadKind::kBootImageLinkTimePcRelative ||
       load_kind == HLoadString::LoadKind::kBssEntry) {
     locations->SetInAt(0, Location::RequiresRegister());
   }
@@ -6068,6 +6238,17 @@
     locations->SetOut(Location::RegisterLocation(EAX));
   } else {
     locations->SetOut(Location::RequiresRegister());
+    if (load_kind == HLoadString::LoadKind::kBssEntry) {
+      if (!kUseReadBarrier || kUseBakerReadBarrier) {
+        // Rely on the pResolveString and/or marking to save everything.
+        RegisterSet caller_saves = RegisterSet::Empty();
+        InvokeRuntimeCallingConvention calling_convention;
+        caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
+        locations->SetCustomSlowPathCallerSaves(caller_saves);
+      } else {
+        // For non-Baker read barrier we have a temp-clobbering call.
+      }
+    }
   }
 }
 
@@ -6100,7 +6281,7 @@
       Address address = Address(method_address, CodeGeneratorX86::kDummy32BitOffset);
       Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
-      GenerateGcRootFieldLoad(load, out_loc, address, fixup_label);
+      GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
       SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86(load);
       codegen_->AddSlowPath(slow_path);
       __ testl(out, out);
@@ -6114,6 +6295,7 @@
 
   // TODO: Re-add the compiler code to do string dex cache lookup again.
   InvokeRuntimeCallingConvention calling_convention;
+  DCHECK_EQ(calling_convention.GetRegisterAt(0), out);
   __ movl(calling_convention.GetRegisterAt(0), Immediate(load->GetStringIndex()));
   codegen_->InvokeRuntime(kQuickResolveString, load, load->GetDexPc());
   CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
@@ -6153,12 +6335,26 @@
   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
 }
 
-static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
-  return kEmitCompilerReadBarrier &&
+// Temp is used for read barrier.
+static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
+  if (kEmitCompilerReadBarrier &&
       !kUseBakerReadBarrier &&
       (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
        type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck);
+       type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
+    return 1;
+  }
+  return 0;
+}
+
+// Interface case has 3 temps, one for holding the number of interfaces, one for the current
+// interface pointer, one for loading the current interface.
+// The other checks have one temp for loading the object's class.
+static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
+  if (type_check_kind == TypeCheckKind::kInterfaceCheck && !kPoisonHeapReferences) {
+    return 2;
+  }
+  return 1 + NumberOfInstanceOfTemps(type_check_kind);
 }
 
 void LocationsBuilderX86::VisitInstanceOf(HInstanceOf* instruction) {
@@ -6189,11 +6385,8 @@
   locations->SetInAt(1, Location::Any());
   // Note that TypeCheckSlowPathX86 uses this "out" register too.
   locations->SetOut(Location::RequiresRegister());
-  // When read barriers are enabled, we need a temporary register for
-  // some cases.
-  if (TypeCheckNeedsATemporary(type_check_kind)) {
-    locations->AddTemp(Location::RequiresRegister());
-  }
+  // When read barriers are enabled, we need a temporary register for some cases.
+  locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
 }
 
 void InstructionCodeGeneratorX86::VisitInstanceOf(HInstanceOf* instruction) {
@@ -6204,9 +6397,9 @@
   Location cls = locations->InAt(1);
   Location out_loc = locations->Out();
   Register out = out_loc.AsRegister<Register>();
-  Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
-      locations->GetTemp(0) :
-      Location::NoLocation();
+  const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
+  DCHECK_LE(num_temps, 1u);
+  Location maybe_temp_loc = (num_temps >= 1) ? locations->GetTemp(0) : Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
   uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
   uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
@@ -6221,11 +6414,14 @@
     __ j(kEqual, &zero);
   }
 
-  // /* HeapReference<Class> */ out = obj->klass_
-  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset);
-
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kCompilerReadBarrierOption);
       if (cls.IsRegister()) {
         __ cmpl(out, cls.AsRegister<Register>());
       } else {
@@ -6241,12 +6437,22 @@
     }
 
     case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kCompilerReadBarrierOption);
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       NearLabel loop;
       __ Bind(&loop);
       // /* HeapReference<Class> */ out = out->super_class_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -6265,6 +6471,12 @@
     }
 
     case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kCompilerReadBarrierOption);
       // Walk over the class hierarchy to find a match.
       NearLabel loop, success;
       __ Bind(&loop);
@@ -6276,7 +6488,11 @@
       }
       __ j(kEqual, &success);
       // /* HeapReference<Class> */ out = out->super_class_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       __ testl(out, out);
       __ j(kNotEqual, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
@@ -6290,6 +6506,12 @@
     }
 
     case TypeCheckKind::kArrayObjectCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kCompilerReadBarrierOption);
       // Do an exact check.
       NearLabel exact_check;
       if (cls.IsRegister()) {
@@ -6301,7 +6523,11 @@
       __ j(kEqual, &exact_check);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ out = out->component_type_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       component_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -6314,6 +6540,13 @@
     }
 
     case TypeCheckKind::kArrayCheck: {
+      // No read barrier since the slow path will retry upon failure.
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
       if (cls.IsRegister()) {
         __ cmpl(out, cls.AsRegister<Register>());
       } else {
@@ -6378,35 +6611,43 @@
   }
 }
 
+static bool IsTypeCheckSlowPathFatal(TypeCheckKind type_check_kind, bool throws_into_catch) {
+  switch (type_check_kind) {
+  case TypeCheckKind::kExactCheck:
+  case TypeCheckKind::kAbstractClassCheck:
+  case TypeCheckKind::kClassHierarchyCheck:
+  case TypeCheckKind::kArrayObjectCheck:
+    return !throws_into_catch && !kEmitCompilerReadBarrier;
+  case TypeCheckKind::kInterfaceCheck:
+    return !throws_into_catch && !kEmitCompilerReadBarrier && !kPoisonHeapReferences;
+  case TypeCheckKind::kArrayCheck:
+  case TypeCheckKind::kUnresolvedCheck:
+    return false;
+  }
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+}
+
 void LocationsBuilderX86::VisitCheckCast(HCheckCast* instruction) {
-  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
   bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
   TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
-  switch (type_check_kind) {
-    case TypeCheckKind::kExactCheck:
-    case TypeCheckKind::kAbstractClassCheck:
-    case TypeCheckKind::kClassHierarchyCheck:
-    case TypeCheckKind::kArrayObjectCheck:
-      call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
-          LocationSummary::kCallOnSlowPath :
-          LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
-      break;
-    case TypeCheckKind::kArrayCheck:
-    case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      call_kind = LocationSummary::kCallOnSlowPath;
-      break;
-  }
+  LocationSummary::CallKind call_kind =
+      IsTypeCheckSlowPathFatal(type_check_kind, throws_into_catch)
+          ? LocationSummary::kNoCall
+          : LocationSummary::kCallOnSlowPath;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::Any());
+  if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
+    // Require a register for the interface check since there is a loop that compares the class to
+    // a memory address.
+    locations->SetInAt(1, Location::RequiresRegister());
+  } else {
+    locations->SetInAt(1, Location::Any());
+  }
   // Note that TypeCheckSlowPathX86 uses this "temp" register too.
   locations->AddTemp(Location::RequiresRegister());
-  // When read barriers are enabled, we need an additional temporary
-  // register for some cases.
-  if (TypeCheckNeedsATemporary(type_check_kind)) {
-    locations->AddTemp(Location::RequiresRegister());
-  }
+  // When read barriers are enabled, we need an additional temporary register for some cases.
+  locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
 }
 
 void InstructionCodeGeneratorX86::VisitCheckCast(HCheckCast* instruction) {
@@ -6417,20 +6658,25 @@
   Location cls = locations->InAt(1);
   Location temp_loc = locations->GetTemp(0);
   Register temp = temp_loc.AsRegister<Register>();
-  Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
-      locations->GetTemp(1) :
-      Location::NoLocation();
-  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
-  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
-  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
+  DCHECK_GE(num_temps, 1u);
+  DCHECK_LE(num_temps, 2u);
+  Location maybe_temp2_loc = (num_temps >= 2) ? locations->GetTemp(1) : Location::NoLocation();
+  const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
+  const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
+  const uint32_t object_array_data_offset =
+      mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
 
+  // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
+  // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
+  // read barriers is done for performance and code size reasons.
   bool is_type_check_slow_path_fatal =
-      (type_check_kind == TypeCheckKind::kExactCheck ||
-       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
-      !instruction->CanThrowIntoCatchBlock();
+      IsTypeCheckSlowPathFatal(type_check_kind, instruction->CanThrowIntoCatchBlock());
+
   SlowPathCode* type_check_slow_path =
       new (GetGraph()->GetArena()) TypeCheckSlowPathX86(instruction,
                                                         is_type_check_slow_path_fatal);
@@ -6443,12 +6689,16 @@
     __ j(kEqual, &done);
   }
 
-  // /* HeapReference<Class> */ temp = obj->klass_
-  GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kArrayCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
+
       if (cls.IsRegister()) {
         __ cmpl(temp, cls.AsRegister<Register>());
       } else {
@@ -6462,28 +6712,30 @@
     }
 
     case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
+
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      NearLabel loop, compare_classes;
+      NearLabel loop;
       __ Bind(&loop);
       // /* HeapReference<Class> */ temp = temp->super_class_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
 
-      // If the class reference currently in `temp` is not null, jump
-      // to the `compare_classes` label to compare it with the checked
-      // class.
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception.
       __ testl(temp, temp);
-      __ j(kNotEqual, &compare_classes);
-      // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-      __ jmp(type_check_slow_path->GetEntryLabel());
+      __ j(kZero, type_check_slow_path->GetEntryLabel());
 
-      __ Bind(&compare_classes);
+      // Otherwise, compare the classes
       if (cls.IsRegister()) {
         __ cmpl(temp, cls.AsRegister<Register>());
       } else {
@@ -6495,6 +6747,13 @@
     }
 
     case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
+
       // Walk over the class hierarchy to find a match.
       NearLabel loop;
       __ Bind(&loop);
@@ -6507,26 +6766,30 @@
       __ j(kEqual, &done);
 
       // /* HeapReference<Class> */ temp = temp->super_class_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
 
       // If the class reference currently in `temp` is not null, jump
       // back at the beginning of the loop.
       __ testl(temp, temp);
-      __ j(kNotEqual, &loop);
-      // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
+      __ j(kNotZero, &loop);
+      // Otherwise, jump to the slow path to throw the exception.;
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
     }
 
     case TypeCheckKind::kArrayObjectCheck: {
+      // /* HeapReference<Class> */ temp = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
+
       // Do an exact check.
-      NearLabel check_non_primitive_component_type;
       if (cls.IsRegister()) {
         __ cmpl(temp, cls.AsRegister<Register>());
       } else {
@@ -6537,38 +6800,24 @@
 
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ temp = temp->component_type_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       component_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
 
-      // If the component type is not null (i.e. the object is indeed
-      // an array), jump to label `check_non_primitive_component_type`
-      // to further check that this component type is not a primitive
-      // type.
+      // If the component type is null (i.e. the object not an array),  jump to the slow path to
+      // throw the exception. Otherwise proceed with the check.
       __ testl(temp, temp);
-      __ j(kNotEqual, &check_non_primitive_component_type);
-      // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-      __ jmp(type_check_slow_path->GetEntryLabel());
+      __ j(kZero, type_check_slow_path->GetEntryLabel());
 
-      __ Bind(&check_non_primitive_component_type);
       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
-      __ j(kEqual, &done);
-      // Same comment as above regarding `temp` and the slow path.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-      __ jmp(type_check_slow_path->GetEntryLabel());
+      __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
       break;
     }
 
     case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      // We always go into the type check slow path for the unresolved
-      // and interface check cases.
-      //
+      // We always go into the type check slow path for the unresolved check case.
       // We cannot directly call the CheckCast runtime entry point
       // without resorting to a type checking slow path here (i.e. by
       // calling InvokeRuntime directly), as it would require to
@@ -6576,15 +6825,50 @@
       // instruction (following the runtime calling convention), which
       // might be cluttered by the potential first read barrier
       // emission at the beginning of this method.
-      //
-      // TODO: Introduce a new runtime entry point taking the object
-      // to test (instead of its class) as argument, and let it deal
-      // with the read barrier issues. This will let us refactor this
-      // case of the `switch` code as it was previously (with a direct
-      // call to the runtime not using a type checking slow path).
-      // This should also be beneficial for the other cases above.
       __ jmp(type_check_slow_path->GetEntryLabel());
       break;
+
+    case TypeCheckKind::kInterfaceCheck: {
+      // Fast path for the interface check. Since we compare with a memory location in the inner
+      // loop we would need to have cls poisoned. However unpoisoning cls would reset the
+      // conditional flags and cause the conditional jump to be incorrect. Therefore we just jump
+      // to the slow path if we are running under poisoning.
+      if (!kPoisonHeapReferences) {
+        // Try to avoid read barriers to improve the fast path. We can not get false positives by
+        // doing this.
+        // /* HeapReference<Class> */ temp = obj->klass_
+        GenerateReferenceLoadTwoRegisters(instruction,
+                                          temp_loc,
+                                          obj_loc,
+                                          class_offset,
+                                          kWithoutReadBarrier);
+
+        // /* HeapReference<Class> */ temp = temp->iftable_
+        GenerateReferenceLoadTwoRegisters(instruction,
+                                          temp_loc,
+                                          temp_loc,
+                                          iftable_offset,
+                                          kWithoutReadBarrier);
+        // Iftable is never null.
+        __ movl(maybe_temp2_loc.AsRegister<Register>(), Address(temp, array_length_offset));
+        // Loop through the iftable and check if any class matches.
+        NearLabel start_loop;
+        __ Bind(&start_loop);
+        // Need to subtract first to handle the empty array case.
+        __ subl(maybe_temp2_loc.AsRegister<Register>(), Immediate(2));
+        __ j(kNegative, type_check_slow_path->GetEntryLabel());
+        // Go to next interface if the classes do not match.
+        __ cmpl(cls.AsRegister<Register>(),
+                CodeGeneratorX86::ArrayAddress(temp,
+                                               maybe_temp2_loc,
+                                               TIMES_4,
+                                               object_array_data_offset));
+        __ j(kNotEqual, &start_loop);
+      } else {
+        __ jmp(type_check_slow_path->GetEntryLabel());
+      }
+      break;
+    }
   }
   __ Bind(&done);
 
@@ -6743,12 +7027,15 @@
   }
 }
 
-void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(HInstruction* instruction,
-                                                                   Location out,
-                                                                   uint32_t offset,
-                                                                   Location maybe_temp) {
+void InstructionCodeGeneratorX86::GenerateReferenceLoadOneRegister(
+    HInstruction* instruction,
+    Location out,
+    uint32_t offset,
+    Location maybe_temp,
+    ReadBarrierOption read_barrier_option) {
   Register out_reg = out.AsRegister<Register>();
-  if (kEmitCompilerReadBarrier) {
+  if (read_barrier_option == kWithReadBarrier) {
+    CHECK(kEmitCompilerReadBarrier);
     if (kUseBakerReadBarrier) {
       // Load with fast path based Baker's read barrier.
       // /* HeapReference<Object> */ out = *(out + offset)
@@ -6773,13 +7060,16 @@
   }
 }
 
-void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
-                                                                    Location out,
-                                                                    Location obj,
-                                                                    uint32_t offset) {
+void InstructionCodeGeneratorX86::GenerateReferenceLoadTwoRegisters(
+    HInstruction* instruction,
+    Location out,
+    Location obj,
+    uint32_t offset,
+    ReadBarrierOption read_barrier_option) {
   Register out_reg = out.AsRegister<Register>();
   Register obj_reg = obj.AsRegister<Register>();
-  if (kEmitCompilerReadBarrier) {
+  if (read_barrier_option == kWithReadBarrier) {
+    CHECK(kEmitCompilerReadBarrier);
     if (kUseBakerReadBarrier) {
       // Load with fast path based Baker's read barrier.
       // /* HeapReference<Object> */ out = *(obj + offset)
@@ -6799,13 +7089,14 @@
   }
 }
 
-void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(HInstruction* instruction,
-                                                          Location root,
-                                                          const Address& address,
-                                                          Label* fixup_label,
-                                                          bool requires_read_barrier) {
+void InstructionCodeGeneratorX86::GenerateGcRootFieldLoad(
+    HInstruction* instruction,
+    Location root,
+    const Address& address,
+    Label* fixup_label,
+    ReadBarrierOption read_barrier_option) {
   Register root_reg = root.AsRegister<Register>();
-  if (requires_read_barrier) {
+  if (read_barrier_option == kWithReadBarrier) {
     DCHECK(kEmitCompilerReadBarrier);
     if (kUseBakerReadBarrier) {
       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
@@ -6831,7 +7122,7 @@
 
       // Slow path marking the GC root `root`.
       SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(
-          instruction, root, /* unpoison */ false);
+          instruction, root, /* unpoison_ref_before_marking */ false);
       codegen_->AddSlowPath(slow_path);
 
       __ fs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86PointerSize>().Int32Value()),
@@ -6896,7 +7187,9 @@
                                                                  Location ref,
                                                                  Register obj,
                                                                  const Address& src,
-                                                                 bool needs_null_check) {
+                                                                 bool needs_null_check,
+                                                                 bool always_update_field,
+                                                                 Register* temp) {
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
@@ -6910,7 +7203,7 @@
   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
   //   HeapReference<Object> ref = *src;  // Original reference load.
-  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+  //   bool is_gray = (rb_state == ReadBarrier::GrayState());
   //   if (is_gray) {
   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
   //   }
@@ -6928,14 +7221,13 @@
   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
 
   // Given the numeric representation, it's enough to check the low bit of the rb_state.
-  static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
-  static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
-  static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+  static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
   constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
   constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
   constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
 
-  // if (rb_state == ReadBarrier::gray_ptr_)
+  // if (rb_state == ReadBarrier::GrayState())
   //   ref = ReadBarrier::Mark(ref);
   // At this point, just do the "if" and make sure that flags are preserved until the branch.
   __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
@@ -6953,8 +7245,15 @@
 
   // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
   // Slow path marking the object `ref` when it is gray.
-  SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(
-      instruction, ref, /* unpoison */ true);
+  SlowPathCode* slow_path;
+  if (always_update_field) {
+    DCHECK(temp != nullptr);
+    slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathX86(
+        instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp);
+  } else {
+    slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86(
+        instruction, ref, /* unpoison_ref_before_marking */ true);
+  }
   AddSlowPath(slow_path);
 
   // We have done the "if" of the gray bit check above, now branch based on the flags.
@@ -7263,7 +7562,7 @@
     // The value to patch is the distance from the offset in the constant area
     // from the address computed by the HX86ComputeBaseMethodAddress instruction.
     int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
-    int32_t relative_position = constant_offset - codegen_->GetMethodAddressOffset();;
+    int32_t relative_position = constant_offset - codegen_->GetMethodAddressOffset();
 
     // Patch in the right value.
     region.StoreUnaligned<int32_t>(pos - 4, relative_position);
diff --git a/compiler/optimizing/code_generator_x86.h b/compiler/optimizing/code_generator_x86.h
index 25f5c2a..164231b 100644
--- a/compiler/optimizing/code_generator_x86.h
+++ b/compiler/optimizing/code_generator_x86.h
@@ -240,7 +240,8 @@
   void GenerateReferenceLoadOneRegister(HInstruction* instruction,
                                         Location out,
                                         uint32_t offset,
-                                        Location maybe_temp);
+                                        Location maybe_temp,
+                                        ReadBarrierOption read_barrier_option);
   // Generate a heap reference load using two different registers
   // `out` and `obj`:
   //
@@ -254,17 +255,18 @@
   void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
                                          Location out,
                                          Location obj,
-                                         uint32_t offset);
+                                         uint32_t offset,
+                                         ReadBarrierOption read_barrier_option);
   // Generate a GC root reference load:
   //
   //   root <- *address
   //
-  // while honoring read barriers (if any).
+  // while honoring read barriers based on read_barrier_option.
   void GenerateGcRootFieldLoad(HInstruction* instruction,
                                Location root,
                                const Address& address,
-                               Label* fixup_label = nullptr,
-                               bool requires_read_barrier = kEmitCompilerReadBarrier);
+                               Label* fixup_label,
+                               ReadBarrierOption read_barrier_option);
 
   // Push value to FPU stack. `is_fp` specifies whether the value is floating point or not.
   // `is_wide` specifies whether it is long/double or not.
@@ -372,9 +374,6 @@
   void DumpCoreRegister(std::ostream& stream, int reg) const OVERRIDE;
   void DumpFloatingPointRegister(std::ostream& stream, int reg) const OVERRIDE;
 
-  // Blocks all register pairs made out of blocked core registers.
-  void UpdateBlockedPairRegisters() const;
-
   ParallelMoveResolverX86* GetMoveResolver() OVERRIDE {
     return &move_resolver_;
   }
@@ -502,13 +501,24 @@
                                              uint32_t data_offset,
                                              Location index,
                                              bool needs_null_check);
-  // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
-  // and GenerateArrayLoadWithBakerReadBarrier.
+  // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
+  // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
+  //
+  // Load the object reference located at address `src`, held by
+  // object `obj`, into `ref`, and mark it if needed.  The base of
+  // address `src` must be `obj`.
+  //
+  // If `always_update_field` is true, the value of the reference is
+  // atomically updated in the holder (`obj`).  This operation
+  // requires a temporary register, which must be provided as a
+  // non-null pointer (`temp`).
   void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
                                                  Location ref,
                                                  Register obj,
                                                  const Address& src,
-                                                 bool needs_null_check);
+                                                 bool needs_null_check,
+                                                 bool always_update_field = false,
+                                                 Register* temp = nullptr);
 
   // Generate a read barrier for a heap reference within `instruction`
   // using a slow path.
diff --git a/compiler/optimizing/code_generator_x86_64.cc b/compiler/optimizing/code_generator_x86_64.cc
index 28638d7..19b3019 100644
--- a/compiler/optimizing/code_generator_x86_64.cc
+++ b/compiler/optimizing/code_generator_x86_64.cc
@@ -199,7 +199,7 @@
       }
       __ movl(length_loc.AsRegister<CpuRegister>(), array_len);
       if (mirror::kUseStringCompression) {
-        __ andl(length_loc.AsRegister<CpuRegister>(), Immediate(INT32_MAX));
+        __ shrl(length_loc.AsRegister<CpuRegister>(), Immediate(1));
       }
     }
 
@@ -299,9 +299,9 @@
     __ Bind(GetEntryLabel());
     SaveLiveRegisters(codegen, locations);
 
-    InvokeRuntimeCallingConvention calling_convention;
     const uint32_t string_index = instruction_->AsLoadString()->GetStringIndex();
-    __ movl(CpuRegister(calling_convention.GetRegisterAt(0)), Immediate(string_index));
+    // Custom calling convention: RAX serves as both input and output.
+    __ movl(CpuRegister(RAX), Immediate(string_index));
     x86_64_codegen->InvokeRuntime(kQuickResolveString,
                                   instruction_,
                                   instruction_->GetDexPc(),
@@ -332,8 +332,6 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Location object_class = instruction_->IsCheckCast() ? locations->GetTemp(0)
-                                                        : locations->Out();
     uint32_t dex_pc = instruction_->GetDexPc();
     DCHECK(instruction_->IsCheckCast()
            || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
@@ -348,22 +346,19 @@
     // We're moving two locations to locations that could overlap, so we need a parallel
     // move resolver.
     InvokeRuntimeCallingConvention calling_convention;
-    codegen->EmitParallelMoves(
-        locations->InAt(1),
-        Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
-        Primitive::kPrimNot,
-        object_class,
-        Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
-        Primitive::kPrimNot);
-
+    codegen->EmitParallelMoves(locations->InAt(0),
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
+                               Primitive::kPrimNot,
+                               locations->InAt(1),
+                               Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
+                               Primitive::kPrimNot);
     if (instruction_->IsInstanceOf()) {
       x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
-      CheckEntrypointTypes<
-          kQuickInstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*>();
+      CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
     } else {
       DCHECK(instruction_->IsCheckCast());
-      x86_64_codegen->InvokeRuntime(kQuickCheckCast, instruction_, dex_pc, this);
-      CheckEntrypointTypes<kQuickCheckCast, void, const mirror::Class*, const mirror::Class*>();
+      x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
+      CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
     }
 
     if (!is_fatal_) {
@@ -445,11 +440,25 @@
   DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
 };
 
-// Slow path marking an object during a read barrier.
+// Slow path marking an object reference `ref` during a read
+// barrier. The field `obj.field` in the object `obj` holding this
+// reference does not get updated by this slow path after marking (see
+// ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that).
+//
+// This means that after the execution of this slow path, `ref` will
+// always be up-to-date, but `obj.field` may not; i.e., after the
+// flip, `ref` will be a to-space reference, but `obj.field` will
+// probably still be a from-space reference (unless it gets updated by
+// another thread, or if another thread installed another object
+// reference (different from `ref`) in `obj.field`).
 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
  public:
-  ReadBarrierMarkSlowPathX86_64(HInstruction* instruction, Location obj, bool unpoison)
-      : SlowPathCode(instruction), obj_(obj), unpoison_(unpoison) {
+  ReadBarrierMarkSlowPathX86_64(HInstruction* instruction,
+                                Location ref,
+                                bool unpoison_ref_before_marking)
+      : SlowPathCode(instruction),
+        ref_(ref),
+        unpoison_ref_before_marking_(unpoison_ref_before_marking) {
     DCHECK(kEmitCompilerReadBarrier);
   }
 
@@ -457,9 +466,10 @@
 
   void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
     LocationSummary* locations = instruction_->GetLocations();
-    Register reg = obj_.AsRegister<Register>();
+    CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
+    Register ref_reg = ref_cpu_reg.AsRegister();
     DCHECK(locations->CanCall());
-    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg));
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
     DCHECK(instruction_->IsInstanceFieldGet() ||
            instruction_->IsStaticFieldGet() ||
            instruction_->IsArrayGet() ||
@@ -474,44 +484,218 @@
         << instruction_->DebugName();
 
     __ Bind(GetEntryLabel());
-    if (unpoison_) {
+    if (unpoison_ref_before_marking_) {
       // Object* ref = ref_addr->AsMirrorPtr()
-      __ MaybeUnpoisonHeapReference(obj_.AsRegister<CpuRegister>());
+      __ MaybeUnpoisonHeapReference(ref_cpu_reg);
     }
     // No need to save live registers; it's taken care of by the
     // entrypoint. Also, there is no need to update the stack mask,
     // as this runtime call will not trigger a garbage collection.
     CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
-    DCHECK_NE(reg, RSP);
-    DCHECK(0 <= reg && reg < kNumberOfCpuRegisters) << reg;
+    DCHECK_NE(ref_reg, RSP);
+    DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
     // "Compact" slow path, saving two moves.
     //
     // Instead of using the standard runtime calling convention (input
     // and output in R0):
     //
-    //   RDI <- obj
+    //   RDI <- ref
     //   RAX <- ReadBarrierMark(RDI)
-    //   obj <- RAX
+    //   ref <- RAX
     //
-    // we just use rX (the register holding `obj`) as input and output
+    // we just use rX (the register containing `ref`) as input and output
     // of a dedicated entrypoint:
     //
     //   rX <- ReadBarrierMarkRegX(rX)
     //
     int32_t entry_point_offset =
-        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(reg);
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
     // This runtime call does not require a stack map.
     x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
     __ jmp(GetExitLabel());
   }
 
  private:
-  const Location obj_;
-  const bool unpoison_;
+  // The location (register) of the marked object reference.
+  const Location ref_;
+  // Should the reference in `ref_` be unpoisoned prior to marking it?
+  const bool unpoison_ref_before_marking_;
 
   DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
 };
 
+// Slow path marking an object reference `ref` during a read barrier,
+// and if needed, atomically updating the field `obj.field` in the
+// object `obj` holding this reference after marking (contrary to
+// ReadBarrierMarkSlowPathX86_64 above, which never tries to update
+// `obj.field`).
+//
+// This means that after the execution of this slow path, both `ref`
+// and `obj.field` will be up-to-date; i.e., after the flip, both will
+// hold the same to-space reference (unless another thread installed
+// another object reference (different from `ref`) in `obj.field`).
+class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
+ public:
+  ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction,
+                                              Location ref,
+                                              CpuRegister obj,
+                                              const Address& field_addr,
+                                              bool unpoison_ref_before_marking,
+                                              CpuRegister temp1,
+                                              CpuRegister temp2)
+      : SlowPathCode(instruction),
+        ref_(ref),
+        obj_(obj),
+        field_addr_(field_addr),
+        unpoison_ref_before_marking_(unpoison_ref_before_marking),
+        temp1_(temp1),
+        temp2_(temp2) {
+    DCHECK(kEmitCompilerReadBarrier);
+  }
+
+  const char* GetDescription() const OVERRIDE {
+    return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64";
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    LocationSummary* locations = instruction_->GetLocations();
+    CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
+    Register ref_reg = ref_cpu_reg.AsRegister();
+    DCHECK(locations->CanCall());
+    DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
+    // This slow path is only used by the UnsafeCASObject intrinsic.
+    DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
+        << "Unexpected instruction in read barrier marking and field updating slow path: "
+        << instruction_->DebugName();
+    DCHECK(instruction_->GetLocations()->Intrinsified());
+    DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
+
+    __ Bind(GetEntryLabel());
+    if (unpoison_ref_before_marking_) {
+      // Object* ref = ref_addr->AsMirrorPtr()
+      __ MaybeUnpoisonHeapReference(ref_cpu_reg);
+    }
+
+    // Save the old (unpoisoned) reference.
+    __ movl(temp1_, ref_cpu_reg);
+
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
+    DCHECK_NE(ref_reg, RSP);
+    DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
+    // "Compact" slow path, saving two moves.
+    //
+    // Instead of using the standard runtime calling convention (input
+    // and output in R0):
+    //
+    //   RDI <- ref
+    //   RAX <- ReadBarrierMark(RDI)
+    //   ref <- RAX
+    //
+    // we just use rX (the register containing `ref`) as input and output
+    // of a dedicated entrypoint:
+    //
+    //   rX <- ReadBarrierMarkRegX(rX)
+    //
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
+    // This runtime call does not require a stack map.
+    x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+
+    // If the new reference is different from the old reference,
+    // update the field in the holder (`*field_addr`).
+    //
+    // Note that this field could also hold a different object, if
+    // another thread had concurrently changed it. In that case, the
+    // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
+    // operation below would abort the CAS, leaving the field as-is.
+    NearLabel done;
+    __ cmpl(temp1_, ref_cpu_reg);
+    __ j(kEqual, &done);
+
+    // Update the the holder's field atomically.  This may fail if
+    // mutator updates before us, but it's OK.  This is achived
+    // using a strong compare-and-set (CAS) operation with relaxed
+    // memory synchronization ordering, where the expected value is
+    // the old reference and the desired value is the new reference.
+    // This operation is implemented with a 32-bit LOCK CMPXLCHG
+    // instruction, which requires the expected value (the old
+    // reference) to be in EAX.  Save RAX beforehand, and move the
+    // expected value (stored in `temp1_`) into EAX.
+    __ movq(temp2_, CpuRegister(RAX));
+    __ movl(CpuRegister(RAX), temp1_);
+
+    // Convenience aliases.
+    CpuRegister base = obj_;
+    CpuRegister expected = CpuRegister(RAX);
+    CpuRegister value = ref_cpu_reg;
+
+    bool base_equals_value = (base.AsRegister() == value.AsRegister());
+    Register value_reg = ref_reg;
+    if (kPoisonHeapReferences) {
+      if (base_equals_value) {
+        // If `base` and `value` are the same register location, move
+        // `value_reg` to a temporary register.  This way, poisoning
+        // `value_reg` won't invalidate `base`.
+        value_reg = temp1_.AsRegister();
+        __ movl(CpuRegister(value_reg), base);
+      }
+
+      // Check that the register allocator did not assign the location
+      // of `expected` (RAX) to `value` nor to `base`, so that heap
+      // poisoning (when enabled) works as intended below.
+      // - If `value` were equal to `expected`, both references would
+      //   be poisoned twice, meaning they would not be poisoned at
+      //   all, as heap poisoning uses address negation.
+      // - If `base` were equal to `expected`, poisoning `expected`
+      //   would invalidate `base`.
+      DCHECK_NE(value_reg, expected.AsRegister());
+      DCHECK_NE(base.AsRegister(), expected.AsRegister());
+
+      __ PoisonHeapReference(expected);
+      __ PoisonHeapReference(CpuRegister(value_reg));
+    }
+
+    __ LockCmpxchgl(field_addr_, CpuRegister(value_reg));
+
+    // If heap poisoning is enabled, we need to unpoison the values
+    // that were poisoned earlier.
+    if (kPoisonHeapReferences) {
+      if (base_equals_value) {
+        // `value_reg` has been moved to a temporary register, no need
+        // to unpoison it.
+      } else {
+        __ UnpoisonHeapReference(CpuRegister(value_reg));
+      }
+      // No need to unpoison `expected` (RAX), as it is be overwritten below.
+    }
+
+    // Restore RAX.
+    __ movq(CpuRegister(RAX), temp2_);
+
+    __ Bind(&done);
+    __ jmp(GetExitLabel());
+  }
+
+ private:
+  // The location (register) of the marked object reference.
+  const Location ref_;
+  // The register containing the object holding the marked object reference field.
+  const CpuRegister obj_;
+  // The address of the marked reference field.  The base of this address must be `obj_`.
+  const Address field_addr_;
+
+  // Should the reference in `ref_` be unpoisoned prior to marking it?
+  const bool unpoison_ref_before_marking_;
+
+  const CpuRegister temp1_;
+  const CpuRegister temp2_;
+
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64);
+};
+
 // Slow path generating a read barrier for a heap reference.
 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
  public:
@@ -1140,8 +1324,13 @@
     }
   }
 
-  __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
-          CpuRegister(kMethodRegisterArgument));
+  // Save the current method if we need it. Note that we do not
+  // do this in HCurrentMethod, as the instruction might have been removed
+  // in the SSA graph.
+  if (RequiresCurrentMethod()) {
+    __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
+            CpuRegister(kMethodRegisterArgument));
+  }
 }
 
 void CodeGeneratorX86_64::GenerateFrameExit() {
@@ -4116,7 +4305,7 @@
       // /* HeapReference<Object> */ out = *(base + offset)
       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
         // Note that a potential implicit null check is handled in this
-        // CodeGeneratorX86::GenerateFieldLoadWithBakerReadBarrier call.
+        // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
         codegen_->GenerateFieldLoadWithBakerReadBarrier(
             instruction, out, base, offset, /* needs_null_check */ true);
         if (is_volatile) {
@@ -4535,9 +4724,11 @@
         // Branch cases into compressed and uncompressed for each index's type.
         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
         NearLabel done, not_compressed;
-        __ cmpl(Address(obj, count_offset), Immediate(0));
+        __ testl(Address(obj, count_offset), Immediate(1));
         codegen_->MaybeRecordImplicitNullCheck(instruction);
-        __ j(kGreaterEqual, &not_compressed);
+        static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                      "Expecting 0=compressed, 1=uncompressed");
+        __ j(kNotZero, &not_compressed);
         __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
         __ jmp(&done);
         __ Bind(&not_compressed);
@@ -4563,7 +4754,7 @@
       //     *(obj + data_offset + index * sizeof(HeapReference<Object>))
       if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
         // Note that a potential implicit null check is handled in this
-        // CodeGeneratorX86::GenerateArrayLoadWithBakerReadBarrier call.
+        // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
         codegen_->GenerateArrayLoadWithBakerReadBarrier(
             instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true);
       } else {
@@ -4869,7 +5060,7 @@
   codegen_->MaybeRecordImplicitNullCheck(instruction);
   // Mask out most significant bit in case the array is String's array of char.
   if (mirror::kUseStringCompression && instruction->IsStringLength()) {
-    __ andl(out, Immediate(INT32_MAX));
+    __ shrl(out, Immediate(1));
   }
 }
 
@@ -4921,10 +5112,12 @@
       Location array_loc = array_length->GetLocations()->InAt(0);
       Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
+        // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
+        // the string compression flag) with the in-memory length and avoid the temporary.
         CpuRegister length_reg = CpuRegister(TMP);
         __ movl(length_reg, array_len);
         codegen_->MaybeRecordImplicitNullCheck(array_length);
-        __ andl(length_reg, Immediate(INT32_MAX));
+        __ shrl(length_reg, Immediate(1));
         codegen_->GenerateIntCompare(length_reg, index_loc);
       } else {
         // Checking the bound for general case:
@@ -5300,7 +5493,9 @@
   Location out_loc = locations->Out();
   CpuRegister out = out_loc.AsRegister<CpuRegister>();
 
-  const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
+  const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
+      ? kWithoutReadBarrier
+      : kCompilerReadBarrierOption;
   bool generate_null_check = false;
   switch (cls->GetLoadKind()) {
     case HLoadClass::LoadKind::kReferrersClass: {
@@ -5312,17 +5507,17 @@
           cls,
           out_loc,
           Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
-          /*fixup_label*/nullptr,
-          requires_read_barrier);
+          /* fixup_label */ nullptr,
+          read_barrier_option);
       break;
     }
     case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
-      DCHECK(!requires_read_barrier);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
       codegen_->RecordTypePatch(cls);
       break;
     case HLoadClass::LoadKind::kBootImageAddress: {
-      DCHECK(!requires_read_barrier);
+      DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
       DCHECK_NE(cls->GetAddress(), 0u);
       uint32_t address = dchecked_integral_cast<uint32_t>(cls->GetAddress());
       __ movl(out, Immediate(address));  // Zero-extended.
@@ -5337,16 +5532,16 @@
         GenerateGcRootFieldLoad(cls,
                                 out_loc,
                                 address,
-                                /*fixup_label*/nullptr,
-                                requires_read_barrier);
+                                /* fixup_label */ nullptr,
+                                read_barrier_option);
       } else {
         // TODO: Consider using opcode A1, i.e. movl eax, moff32 (with 64-bit address).
         __ movq(out, Immediate(cls->GetAddress()));
         GenerateGcRootFieldLoad(cls,
                                 out_loc,
                                 Address(out, 0),
-                                /*fixup_label*/nullptr,
-                                requires_read_barrier);
+                                /* fixup_label */ nullptr,
+                                read_barrier_option);
       }
       generate_null_check = !cls->IsInDexCache();
       break;
@@ -5357,7 +5552,7 @@
       Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
                                           /* no_rip */ false);
       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
-      GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, requires_read_barrier);
+      GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
       generate_null_check = !cls->IsInDexCache();
       break;
     }
@@ -5373,8 +5568,8 @@
           cls,
           out_loc,
           Address(out, CodeGenerator::GetCacheOffset(cls->GetTypeIndex())),
-          /*fixup_label*/nullptr,
-          requires_read_barrier);
+          /* fixup_label */ nullptr,
+          read_barrier_option);
       generate_null_check = !cls->IsInDexCache();
       break;
     }
@@ -5430,9 +5625,6 @@
       break;
     case HLoadString::LoadKind::kBootImageAddress:
       break;
-    case HLoadString::LoadKind::kDexCacheAddress:
-      DCHECK(Runtime::Current()->UseJitCompilation());
-      break;
     case HLoadString::LoadKind::kBssEntry:
       DCHECK(!Runtime::Current()->UseJitCompilation());
       break;
@@ -5450,10 +5642,20 @@
       : LocationSummary::kNoCall;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
   if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
-    locations->SetInAt(0, Location::RequiresRegister());
     locations->SetOut(Location::RegisterLocation(RAX));
   } else {
     locations->SetOut(Location::RequiresRegister());
+    if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
+      if (!kUseReadBarrier || kUseBakerReadBarrier) {
+        // Rely on the pResolveString and/or marking to save everything.
+        // Custom calling convention: RAX serves as both input and output.
+        RegisterSet caller_saves = RegisterSet::Empty();
+        caller_saves.Add(Location::RegisterLocation(RAX));
+        locations->SetCustomSlowPathCallerSaves(caller_saves);
+      } else {
+        // For non-Baker read barrier we have a temp-clobbering call.
+      }
+    }
   }
 }
 
@@ -5480,7 +5682,7 @@
                                           /* no_rip */ false);
       Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
       // /* GcRoot<mirror::Class> */ out = *address  /* PC-relative */
-      GenerateGcRootFieldLoad(load, out_loc, address, fixup_label);
+      GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
       SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
       codegen_->AddSlowPath(slow_path);
       __ testl(out, out);
@@ -5493,9 +5695,8 @@
   }
 
   // TODO: Re-add the compiler code to do string dex cache lookup again.
-  InvokeRuntimeCallingConvention calling_convention;
-  __ movl(CpuRegister(calling_convention.GetRegisterAt(0)),
-          Immediate(load->GetStringIndex()));
+  // Custom calling convention: RAX serves as both input and output.
+  __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex()));
   codegen_->InvokeRuntime(kQuickResolveString,
                           load,
                           load->GetDexPc());
@@ -5537,7 +5738,19 @@
   CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
 }
 
-static bool TypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+static bool CheckCastTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
+  if (type_check_kind == TypeCheckKind::kInterfaceCheck && !kPoisonHeapReferences) {
+    // We need a temporary for holding the iftable length.
+    return true;
+  }
+  return kEmitCompilerReadBarrier &&
+      !kUseBakerReadBarrier &&
+      (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
+       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
+       type_check_kind == TypeCheckKind::kArrayObjectCheck);
+}
+
+static bool InstanceOfTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
   return kEmitCompilerReadBarrier &&
       !kUseBakerReadBarrier &&
       (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
@@ -5575,7 +5788,7 @@
   locations->SetOut(Location::RequiresRegister());
   // When read barriers are enabled, we need a temporary register for
   // some cases.
-  if (TypeCheckNeedsATemporary(type_check_kind)) {
+  if (InstanceOfTypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
   }
 }
@@ -5588,7 +5801,7 @@
   Location cls = locations->InAt(1);
   Location out_loc =  locations->Out();
   CpuRegister out = out_loc.AsRegister<CpuRegister>();
-  Location maybe_temp_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+  Location maybe_temp_loc = InstanceOfTypeCheckNeedsATemporary(type_check_kind) ?
       locations->GetTemp(0) :
       Location::NoLocation();
   uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
@@ -5605,11 +5818,14 @@
     __ j(kEqual, &zero);
   }
 
-  // /* HeapReference<Class> */ out = obj->klass_
-  GenerateReferenceLoadTwoRegisters(instruction, out_loc, obj_loc, class_offset);
-
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kCompilerReadBarrierOption);
       if (cls.IsRegister()) {
         __ cmpl(out, cls.AsRegister<CpuRegister>());
       } else {
@@ -5630,12 +5846,22 @@
     }
 
     case TypeCheckKind::kAbstractClassCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kCompilerReadBarrierOption);
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
       NearLabel loop, success;
       __ Bind(&loop);
       // /* HeapReference<Class> */ out = out->super_class_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -5654,6 +5880,12 @@
     }
 
     case TypeCheckKind::kClassHierarchyCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kCompilerReadBarrierOption);
       // Walk over the class hierarchy to find a match.
       NearLabel loop, success;
       __ Bind(&loop);
@@ -5665,7 +5897,11 @@
       }
       __ j(kEqual, &success);
       // /* HeapReference<Class> */ out = out->super_class_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, super_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       super_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       __ testl(out, out);
       __ j(kNotEqual, &loop);
       // If `out` is null, we use it for the result, and jump to `done`.
@@ -5679,6 +5915,12 @@
     }
 
     case TypeCheckKind::kArrayObjectCheck: {
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kCompilerReadBarrierOption);
       // Do an exact check.
       NearLabel exact_check;
       if (cls.IsRegister()) {
@@ -5690,7 +5932,11 @@
       __ j(kEqual, &exact_check);
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ out = out->component_type_
-      GenerateReferenceLoadOneRegister(instruction, out_loc, component_offset, maybe_temp_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       out_loc,
+                                       component_offset,
+                                       maybe_temp_loc,
+                                       kCompilerReadBarrierOption);
       __ testl(out, out);
       // If `out` is null, we use it for the result, and jump to `done`.
       __ j(kEqual, &done);
@@ -5703,6 +5949,13 @@
     }
 
     case TypeCheckKind::kArrayCheck: {
+      // No read barrier since the slow path will retry upon failure.
+      // /* HeapReference<Class> */ out = obj->klass_
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        out_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
       if (cls.IsRegister()) {
         __ cmpl(out, cls.AsRegister<CpuRegister>());
       } else {
@@ -5767,33 +6020,45 @@
   }
 }
 
-void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
-  LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
-  bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
-  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+static bool IsTypeCheckSlowPathFatal(TypeCheckKind type_check_kind, bool throws_into_catch) {
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kAbstractClassCheck:
     case TypeCheckKind::kClassHierarchyCheck:
     case TypeCheckKind::kArrayObjectCheck:
-      call_kind = (throws_into_catch || kEmitCompilerReadBarrier) ?
-          LocationSummary::kCallOnSlowPath :
-          LocationSummary::kNoCall;  // In fact, call on a fatal (non-returning) slow path.
-      break;
+      return !throws_into_catch && !kEmitCompilerReadBarrier;
+    case TypeCheckKind::kInterfaceCheck:
+      return !throws_into_catch && !kEmitCompilerReadBarrier && !kPoisonHeapReferences;
     case TypeCheckKind::kArrayCheck:
     case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      call_kind = LocationSummary::kCallOnSlowPath;
-      break;
+      return false;
   }
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+}
+
+void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
+  bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
+  TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
+  bool is_fatal_slow_path = IsTypeCheckSlowPathFatal(type_check_kind, throws_into_catch);
+  LocationSummary::CallKind call_kind = is_fatal_slow_path
+                                            ? LocationSummary::kNoCall
+                                            : LocationSummary::kCallOnSlowPath;
   LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
   locations->SetInAt(0, Location::RequiresRegister());
-  locations->SetInAt(1, Location::Any());
+  if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
+    // Require a register for the interface check since there is a loop that compares the class to
+    // a memory address.
+    locations->SetInAt(1, Location::RequiresRegister());
+  } else {
+    locations->SetInAt(1, Location::Any());
+  }
+
   // Note that TypeCheckSlowPathX86_64 uses this "temp" register too.
   locations->AddTemp(Location::RequiresRegister());
   // When read barriers are enabled, we need an additional temporary
   // register for some cases.
-  if (TypeCheckNeedsATemporary(type_check_kind)) {
+  if (CheckCastTypeCheckNeedsATemporary(type_check_kind)) {
     locations->AddTemp(Location::RequiresRegister());
   }
 }
@@ -5806,38 +6071,45 @@
   Location cls = locations->InAt(1);
   Location temp_loc = locations->GetTemp(0);
   CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
-  Location maybe_temp2_loc = TypeCheckNeedsATemporary(type_check_kind) ?
+  Location maybe_temp2_loc = CheckCastTypeCheckNeedsATemporary(type_check_kind) ?
       locations->GetTemp(1) :
       Location::NoLocation();
-  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
-  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
-  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
-  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
+  const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
+  const uint32_t object_array_data_offset =
+      mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
 
+  // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
+  // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
+  // read barriers is done for performance and code size reasons.
   bool is_type_check_slow_path_fatal =
-      (type_check_kind == TypeCheckKind::kExactCheck ||
-       type_check_kind == TypeCheckKind::kAbstractClassCheck ||
-       type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
-       type_check_kind == TypeCheckKind::kArrayObjectCheck) &&
-      !instruction->CanThrowIntoCatchBlock();
+      IsTypeCheckSlowPathFatal(type_check_kind, instruction->CanThrowIntoCatchBlock());
   SlowPathCode* type_check_slow_path =
       new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
                                                            is_type_check_slow_path_fatal);
   codegen_->AddSlowPath(type_check_slow_path);
 
+
+  NearLabel done;
+  // Avoid null check if we know obj is not null.
+  if (instruction->MustDoNullCheck()) {
+    __ testl(obj, obj);
+    __ j(kEqual, &done);
+  }
+
   switch (type_check_kind) {
     case TypeCheckKind::kExactCheck:
     case TypeCheckKind::kArrayCheck: {
-      NearLabel done;
-      // Avoid null check if we know obj is not null.
-      if (instruction->MustDoNullCheck()) {
-        __ testl(obj, obj);
-        __ j(kEqual, &done);
-      }
-
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
       if (cls.IsRegister()) {
         __ cmpl(temp, cls.AsRegister<CpuRegister>());
       } else {
@@ -5847,43 +6119,32 @@
       // Jump to slow path for throwing the exception or doing a
       // more involved array check.
       __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
-      __ Bind(&done);
       break;
     }
 
     case TypeCheckKind::kAbstractClassCheck: {
-      NearLabel done;
-      // Avoid null check if we know obj is not null.
-      if (instruction->MustDoNullCheck()) {
-        __ testl(obj, obj);
-        __ j(kEqual, &done);
-      }
-
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
       // If the class is abstract, we eagerly fetch the super class of the
       // object to avoid doing a comparison we know will fail.
-      NearLabel loop, compare_classes;
+      NearLabel loop;
       __ Bind(&loop);
       // /* HeapReference<Class> */ temp = temp->super_class_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
 
-      // If the class reference currently in `temp` is not null, jump
-      // to the `compare_classes` label to compare it with the checked
-      // class.
+      // If the class reference currently in `temp` is null, jump to the slow path to throw the
+      // exception.
       __ testl(temp, temp);
-      __ j(kNotEqual, &compare_classes);
-      // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-      __ jmp(type_check_slow_path->GetEntryLabel());
-
-      __ Bind(&compare_classes);
+      // Otherwise, compare the classes.
+      __ j(kZero, type_check_slow_path->GetEntryLabel());
       if (cls.IsRegister()) {
         __ cmpl(temp, cls.AsRegister<CpuRegister>());
       } else {
@@ -5891,21 +6152,16 @@
         __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
       }
       __ j(kNotEqual, &loop);
-      __ Bind(&done);
       break;
     }
 
     case TypeCheckKind::kClassHierarchyCheck: {
-      NearLabel done;
-      // Avoid null check if we know obj is not null.
-      if (instruction->MustDoNullCheck()) {
-        __ testl(obj, obj);
-        __ j(kEqual, &done);
-      }
-
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
       // Walk over the class hierarchy to find a match.
       NearLabel loop;
       __ Bind(&loop);
@@ -5918,39 +6174,28 @@
       __ j(kEqual, &done);
 
       // /* HeapReference<Class> */ temp = temp->super_class_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, super_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       super_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
 
       // If the class reference currently in `temp` is not null, jump
       // back at the beginning of the loop.
       __ testl(temp, temp);
-      __ j(kNotEqual, &loop);
+      __ j(kNotZero, &loop);
       // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
       __ jmp(type_check_slow_path->GetEntryLabel());
-      __ Bind(&done);
       break;
     }
 
     case TypeCheckKind::kArrayObjectCheck: {
-      // We cannot use a NearLabel here, as its range might be too
-      // short in some cases when read barriers are enabled.  This has
-      // been observed for instance when the code emitted for this
-      // case uses high x86-64 registers (R8-R15).
-      Label done;
-      // Avoid null check if we know obj is not null.
-      if (instruction->MustDoNullCheck()) {
-        __ testl(obj, obj);
-        __ j(kEqual, &done);
-      }
-
       // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-
+      GenerateReferenceLoadTwoRegisters(instruction,
+                                        temp_loc,
+                                        obj_loc,
+                                        class_offset,
+                                        kWithoutReadBarrier);
       // Do an exact check.
       NearLabel check_non_primitive_component_type;
       if (cls.IsRegister()) {
@@ -5963,48 +6208,26 @@
 
       // Otherwise, we need to check that the object's class is a non-primitive array.
       // /* HeapReference<Class> */ temp = temp->component_type_
-      GenerateReferenceLoadOneRegister(instruction, temp_loc, component_offset, maybe_temp2_loc);
+      GenerateReferenceLoadOneRegister(instruction,
+                                       temp_loc,
+                                       component_offset,
+                                       maybe_temp2_loc,
+                                       kWithoutReadBarrier);
 
       // If the component type is not null (i.e. the object is indeed
       // an array), jump to label `check_non_primitive_component_type`
       // to further check that this component type is not a primitive
       // type.
       __ testl(temp, temp);
-      __ j(kNotEqual, &check_non_primitive_component_type);
       // Otherwise, jump to the slow path to throw the exception.
-      //
-      // But before, move back the object's class into `temp` before
-      // going into the slow path, as it has been overwritten in the
-      // meantime.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-      __ jmp(type_check_slow_path->GetEntryLabel());
-
-      __ Bind(&check_non_primitive_component_type);
+      __ j(kZero, type_check_slow_path->GetEntryLabel());
       __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
-      __ j(kEqual, &done);
-      // Same comment as above regarding `temp` and the slow path.
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-      __ jmp(type_check_slow_path->GetEntryLabel());
-      __ Bind(&done);
+      __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
       break;
     }
 
-    case TypeCheckKind::kUnresolvedCheck:
-    case TypeCheckKind::kInterfaceCheck:
-      NearLabel done;
-      // Avoid null check if we know obj is not null.
-      if (instruction->MustDoNullCheck()) {
-        __ testl(obj, obj);
-        __ j(kEqual, &done);
-      }
-
-      // /* HeapReference<Class> */ temp = obj->klass_
-      GenerateReferenceLoadTwoRegisters(instruction, temp_loc, obj_loc, class_offset);
-
-      // We always go into the type check slow path for the unresolved
-      // and interface check cases.
+    case TypeCheckKind::kUnresolvedCheck: {
+      // We always go into the type check slow path for the unresolved case.
       //
       // We cannot directly call the CheckCast runtime entry point
       // without resorting to a type checking slow path here (i.e. by
@@ -6013,16 +6236,52 @@
       // instruction (following the runtime calling convention), which
       // might be cluttered by the potential first read barrier
       // emission at the beginning of this method.
-      //
-      // TODO: Introduce a new runtime entry point taking the object
-      // to test (instead of its class) as argument, and let it deal
-      // with the read barrier issues. This will let us refactor this
-      // case of the `switch` code as it was previously (with a direct
-      // call to the runtime not using a type checking slow path).
-      // This should also be beneficial for the other cases above.
       __ jmp(type_check_slow_path->GetEntryLabel());
-      __ Bind(&done);
       break;
+    }
+
+    case TypeCheckKind::kInterfaceCheck:
+      // Fast path for the interface check. We always go slow path for heap poisoning since
+      // unpoisoning cls would require an extra temp.
+      if (!kPoisonHeapReferences) {
+        // Try to avoid read barriers to improve the fast path. We can not get false positives by
+        // doing this.
+        // /* HeapReference<Class> */ temp = obj->klass_
+        GenerateReferenceLoadTwoRegisters(instruction,
+                                          temp_loc,
+                                          obj_loc,
+                                          class_offset,
+                                          kWithoutReadBarrier);
+
+        // /* HeapReference<Class> */ temp = temp->iftable_
+        GenerateReferenceLoadTwoRegisters(instruction,
+                                          temp_loc,
+                                          temp_loc,
+                                          iftable_offset,
+                                          kWithoutReadBarrier);
+        // Iftable is never null.
+        __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset));
+        // Loop through the iftable and check if any class matches.
+        NearLabel start_loop;
+        __ Bind(&start_loop);
+        // Need to subtract first to handle the empty array case.
+        __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2));
+        __ j(kNegative, type_check_slow_path->GetEntryLabel());
+        // Go to next interface if the classes do not match.
+        __ cmpl(cls.AsRegister<CpuRegister>(),
+                CodeGeneratorX86_64::ArrayAddress(temp,
+                                                  maybe_temp2_loc,
+                                                  TIMES_4,
+                                                  object_array_data_offset));
+        __ j(kNotEqual, &start_loop);  // Return if same class.
+      } else {
+        __ jmp(type_check_slow_path->GetEntryLabel());
+      }
+      break;
+  }
+
+  if (done.IsLinked()) {
+    __ Bind(&done);
   }
 
   __ Bind(type_check_slow_path->GetExitLabel());
@@ -6161,12 +6420,15 @@
   }
 }
 
-void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(HInstruction* instruction,
-                                                                      Location out,
-                                                                      uint32_t offset,
-                                                                      Location maybe_temp) {
+void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
+    HInstruction* instruction,
+    Location out,
+    uint32_t offset,
+    Location maybe_temp,
+    ReadBarrierOption read_barrier_option) {
   CpuRegister out_reg = out.AsRegister<CpuRegister>();
-  if (kEmitCompilerReadBarrier) {
+  if (read_barrier_option == kWithReadBarrier) {
+    CHECK(kEmitCompilerReadBarrier);
     if (kUseBakerReadBarrier) {
       // Load with fast path based Baker's read barrier.
       // /* HeapReference<Object> */ out = *(out + offset)
@@ -6191,13 +6453,16 @@
   }
 }
 
-void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
-                                                                       Location out,
-                                                                       Location obj,
-                                                                       uint32_t offset) {
+void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
+    HInstruction* instruction,
+    Location out,
+    Location obj,
+    uint32_t offset,
+    ReadBarrierOption read_barrier_option) {
   CpuRegister out_reg = out.AsRegister<CpuRegister>();
   CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
-  if (kEmitCompilerReadBarrier) {
+  if (read_barrier_option == kWithReadBarrier) {
+    CHECK(kEmitCompilerReadBarrier);
     if (kUseBakerReadBarrier) {
       // Load with fast path based Baker's read barrier.
       // /* HeapReference<Object> */ out = *(obj + offset)
@@ -6217,13 +6482,14 @@
   }
 }
 
-void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(HInstruction* instruction,
-                                                             Location root,
-                                                             const Address& address,
-                                                             Label* fixup_label,
-                                                             bool requires_read_barrier) {
+void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
+    HInstruction* instruction,
+    Location root,
+    const Address& address,
+    Label* fixup_label,
+    ReadBarrierOption read_barrier_option) {
   CpuRegister root_reg = root.AsRegister<CpuRegister>();
-  if (requires_read_barrier) {
+  if (read_barrier_option == kWithReadBarrier) {
     DCHECK(kEmitCompilerReadBarrier);
     if (kUseBakerReadBarrier) {
       // Fast path implementation of art::ReadBarrier::BarrierForRoot when
@@ -6249,7 +6515,7 @@
 
       // Slow path marking the GC root `root`.
       SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(
-          instruction, root, /* unpoison */ false);
+          instruction, root, /* unpoison_ref_before_marking */ false);
       codegen_->AddSlowPath(slow_path);
 
       __ gs()->cmpl(Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>().Int32Value(),
@@ -6315,7 +6581,10 @@
                                                                     Location ref,
                                                                     CpuRegister obj,
                                                                     const Address& src,
-                                                                    bool needs_null_check) {
+                                                                    bool needs_null_check,
+                                                                    bool always_update_field,
+                                                                    CpuRegister* temp1,
+                                                                    CpuRegister* temp2) {
   DCHECK(kEmitCompilerReadBarrier);
   DCHECK(kUseBakerReadBarrier);
 
@@ -6329,7 +6598,7 @@
   //   uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
   //   lfence;  // Load fence or artificial data dependency to prevent load-load reordering
   //   HeapReference<Object> ref = *src;  // Original reference load.
-  //   bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+  //   bool is_gray = (rb_state == ReadBarrier::GrayState());
   //   if (is_gray) {
   //     ref = ReadBarrier::Mark(ref);  // Performed by runtime entrypoint slow path.
   //   }
@@ -6347,14 +6616,13 @@
   uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
 
   // Given the numeric representation, it's enough to check the low bit of the rb_state.
-  static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
-  static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
-  static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+  static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+  static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
   constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
   constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
   constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
 
-  // if (rb_state == ReadBarrier::gray_ptr_)
+  // if (rb_state == ReadBarrier::GrayState())
   //   ref = ReadBarrier::Mark(ref);
   // At this point, just do the "if" and make sure that flags are preserved until the branch.
   __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
@@ -6372,8 +6640,16 @@
 
   // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
   // Slow path marking the object `ref` when it is gray.
-  SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(
-      instruction, ref, /* unpoison */ true);
+  SlowPathCode* slow_path;
+  if (always_update_field) {
+    DCHECK(temp1 != nullptr);
+    DCHECK(temp2 != nullptr);
+    slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64(
+        instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp1, *temp2);
+  } else {
+    slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(
+        instruction, ref, /* unpoison_ref_before_marking */ true);
+  }
   AddSlowPath(slow_path);
 
   // We have done the "if" of the gray bit check above, now branch based on the flags.
diff --git a/compiler/optimizing/code_generator_x86_64.h b/compiler/optimizing/code_generator_x86_64.h
index 57ef83f..e5a4152 100644
--- a/compiler/optimizing/code_generator_x86_64.h
+++ b/compiler/optimizing/code_generator_x86_64.h
@@ -234,7 +234,8 @@
   void GenerateReferenceLoadOneRegister(HInstruction* instruction,
                                         Location out,
                                         uint32_t offset,
-                                        Location maybe_temp);
+                                        Location maybe_temp,
+                                        ReadBarrierOption read_barrier_option);
   // Generate a heap reference load using two different registers
   // `out` and `obj`:
   //
@@ -248,17 +249,18 @@
   void GenerateReferenceLoadTwoRegisters(HInstruction* instruction,
                                          Location out,
                                          Location obj,
-                                         uint32_t offset);
+                                         uint32_t offset,
+                                         ReadBarrierOption read_barrier_option);
   // Generate a GC root reference load:
   //
   //   root <- *address
   //
-  // while honoring read barriers (if any).
+  // while honoring read barriers based on read_barrier_option.
   void GenerateGcRootFieldLoad(HInstruction* instruction,
                                Location root,
                                const Address& address,
-                               Label* fixup_label = nullptr,
-                               bool requires_read_barrier = kEmitCompilerReadBarrier);
+                               Label* fixup_label,
+                               ReadBarrierOption read_barrier_option);
 
   void PushOntoFPStack(Location source, uint32_t temp_offset,
                        uint32_t stack_adjustment, bool is_float);
@@ -434,13 +436,25 @@
                                              uint32_t data_offset,
                                              Location index,
                                              bool needs_null_check);
-  // Factored implementation used by GenerateFieldLoadWithBakerReadBarrier
-  // and GenerateArrayLoadWithBakerReadBarrier.
+  // Factored implementation, used by GenerateFieldLoadWithBakerReadBarrier,
+  // GenerateArrayLoadWithBakerReadBarrier and some intrinsics.
+  //
+  // Load the object reference located at address `src`, held by
+  // object `obj`, into `ref`, and mark it if needed.  The base of
+  // address `src` must be `obj`.
+  //
+  // If `always_update_field` is true, the value of the reference is
+  // atomically updated in the holder (`obj`).  This operation
+  // requires two temporary registers, which must be provided as
+  // non-null pointers (`temp1` and `temp2`).
   void GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
                                                  Location ref,
                                                  CpuRegister obj,
                                                  const Address& src,
-                                                 bool needs_null_check);
+                                                 bool needs_null_check,
+                                                 bool always_update_field = false,
+                                                 CpuRegister* temp1 = nullptr,
+                                                 CpuRegister* temp2 = nullptr);
 
   // Generate a read barrier for a heap reference within `instruction`
   // using a slow path.
diff --git a/compiler/optimizing/codegen_test.cc b/compiler/optimizing/codegen_test.cc
index f19faa3..ac83bd9 100644
--- a/compiler/optimizing/codegen_test.cc
+++ b/compiler/optimizing/codegen_test.cc
@@ -115,8 +115,6 @@
     blocked_core_registers_[arm::R4] = true;
     blocked_core_registers_[arm::R6] = false;
     blocked_core_registers_[arm::R7] = false;
-    // Makes pair R6-R7 available.
-    blocked_register_pairs_[arm::R6_R7] = false;
   }
 };
 
@@ -137,8 +135,6 @@
     blocked_core_registers_[arm::R4] = true;
     blocked_core_registers_[arm::R6] = false;
     blocked_core_registers_[arm::R7] = false;
-    // Makes pair R6-R7 available.
-    blocked_register_pairs_[arm::R6_R7] = false;
   }
 };
 #endif
@@ -158,14 +154,9 @@
     x86::CodeGeneratorX86::SetupBlockedRegisters();
     // ebx is a callee-save register in C, but caller-save for ART.
     blocked_core_registers_[x86::EBX] = true;
-    blocked_register_pairs_[x86::EAX_EBX] = true;
-    blocked_register_pairs_[x86::EDX_EBX] = true;
-    blocked_register_pairs_[x86::ECX_EBX] = true;
-    blocked_register_pairs_[x86::EBX_EDI] = true;
 
     // Make edi available.
     blocked_core_registers_[x86::EDI] = false;
-    blocked_register_pairs_[x86::ECX_EDI] = false;
   }
 };
 #endif
@@ -268,7 +259,7 @@
   GraphChecker graph_checker(graph);
   graph_checker.Run();
   if (!graph_checker.IsValid()) {
-    for (auto error : graph_checker.GetErrors()) {
+    for (const auto& error : graph_checker.GetErrors()) {
       std::cout << error << std::endl;
     }
   }
@@ -278,7 +269,7 @@
 template <typename Expected>
 static void RunCodeNoCheck(CodeGenerator* codegen,
                            HGraph* graph,
-                           std::function<void(HGraph*)> hook_before_codegen,
+                           const std::function<void(HGraph*)>& hook_before_codegen,
                            bool has_result,
                            Expected expected) {
   SsaLivenessAnalysis liveness(graph, codegen);
diff --git a/compiler/optimizing/common_arm.h b/compiler/optimizing/common_arm.h
index 8535417..5129daf 100644
--- a/compiler/optimizing/common_arm.h
+++ b/compiler/optimizing/common_arm.h
@@ -37,9 +37,24 @@
   return dwarf::Reg::ArmFp(static_cast<int>(reg.GetCode()));
 }
 
-inline vixl::aarch32::DRegister FromLowSToD(vixl::aarch32::SRegister reg) {
-  DCHECK_EQ(reg.GetCode() % 2, 0u) << reg;
-  return vixl::aarch32::DRegister(reg.GetCode() / 2);
+inline vixl::aarch32::Register HighRegisterFrom(Location location) {
+  DCHECK(location.IsRegisterPair()) << location;
+  return vixl::aarch32::Register(location.AsRegisterPairHigh<vixl::aarch32::Register>());
+}
+
+inline vixl::aarch32::DRegister HighDRegisterFrom(Location location) {
+  DCHECK(location.IsFpuRegisterPair()) << location;
+  return vixl::aarch32::DRegister(location.AsFpuRegisterPairHigh<vixl::aarch32::DRegister>());
+}
+
+inline vixl::aarch32::Register LowRegisterFrom(Location location) {
+  DCHECK(location.IsRegisterPair()) << location;
+  return vixl::aarch32::Register(location.AsRegisterPairLow<vixl::aarch32::Register>());
+}
+
+inline vixl::aarch32::SRegister LowSRegisterFrom(Location location) {
+  DCHECK(location.IsFpuRegisterPair()) << location;
+  return vixl::aarch32::SRegister(location.AsFpuRegisterPairLow<vixl::aarch32::SRegister>());
 }
 
 inline vixl::aarch32::Register RegisterFrom(Location location) {
@@ -53,8 +68,10 @@
 }
 
 inline vixl::aarch32::DRegister DRegisterFrom(Location location) {
-  DCHECK(location.IsFpuRegister()) << location;
-  return vixl::aarch32::DRegister(location.reg());
+  DCHECK(location.IsFpuRegisterPair()) << location;
+  int reg_code = location.low();
+  DCHECK_EQ(reg_code % 2, 0) << reg_code;
+  return vixl::aarch32::DRegister(reg_code / 2);
 }
 
 inline vixl::aarch32::SRegister SRegisterFrom(Location location) {
@@ -74,6 +91,15 @@
   return DRegisterFrom(instr->GetLocations()->Out());
 }
 
+inline vixl::aarch32::VRegister OutputVRegister(HInstruction* instr) {
+  Primitive::Type type = instr->GetType();
+  if (type == Primitive::kPrimFloat) {
+    return OutputSRegister(instr);
+  } else {
+    return OutputDRegister(instr);
+  }
+}
+
 inline vixl::aarch32::SRegister InputSRegisterAt(HInstruction* instr, int input_index) {
   Primitive::Type type = instr->InputAt(input_index)->GetType();
   DCHECK_EQ(type, Primitive::kPrimFloat) << type;
@@ -86,6 +112,15 @@
   return DRegisterFrom(instr->GetLocations()->InAt(input_index));
 }
 
+inline vixl::aarch32::VRegister InputVRegisterAt(HInstruction* instr, int input_index) {
+  Primitive::Type type = instr->InputAt(input_index)->GetType();
+  if (type == Primitive::kPrimFloat) {
+    return InputSRegisterAt(instr, input_index);
+  } else {
+    return InputDRegisterAt(instr, input_index);
+  }
+}
+
 inline vixl::aarch32::Register OutputRegister(HInstruction* instr) {
   return RegisterFrom(instr->GetLocations()->Out(), instr->GetType());
 }
@@ -95,6 +130,21 @@
                       instr->InputAt(input_index)->GetType());
 }
 
+inline vixl::aarch32::Register InputRegister(HInstruction* instr) {
+  DCHECK_EQ(instr->InputCount(), 1u);
+  return InputRegisterAt(instr, 0);
+}
+
+inline int32_t Int32ConstantFrom(Location location) {
+  HConstant* instr = location.GetConstant();
+  if (instr->IsIntConstant()) {
+    return instr->AsIntConstant()->GetValue();
+  } else {
+    DCHECK(instr->IsNullConstant()) << instr->DebugName();
+    return 0;
+  }
+}
+
 inline int64_t Int64ConstantFrom(Location location) {
   HConstant* instr = location.GetConstant();
   if (instr->IsIntConstant()) {
@@ -120,6 +170,24 @@
                      instr->InputAt(input_index)->GetType());
 }
 
+inline Location LocationFrom(const vixl::aarch32::Register& reg) {
+  return Location::RegisterLocation(reg.GetCode());
+}
+
+inline Location LocationFrom(const vixl::aarch32::SRegister& reg) {
+  return Location::FpuRegisterLocation(reg.GetCode());
+}
+
+inline Location LocationFrom(const vixl::aarch32::Register& low,
+                             const vixl::aarch32::Register& high) {
+  return Location::RegisterPairLocation(low.GetCode(), high.GetCode());
+}
+
+inline Location LocationFrom(const vixl::aarch32::SRegister& low,
+                             const vixl::aarch32::SRegister& high) {
+  return Location::FpuRegisterPairLocation(low.GetCode(), high.GetCode());
+}
+
 }  // namespace helpers
 }  // namespace arm
 }  // namespace art
diff --git a/compiler/optimizing/constant_folding.h b/compiler/optimizing/constant_folding.h
index e10b1d6..05c6df4 100644
--- a/compiler/optimizing/constant_folding.h
+++ b/compiler/optimizing/constant_folding.h
@@ -39,8 +39,7 @@
  */
 class HConstantFolding : public HOptimization {
  public:
-  HConstantFolding(HGraph* graph, const char* name = kConstantFoldingPassName)
-      : HOptimization(graph, name) {}
+  HConstantFolding(HGraph* graph, const char* name) : HOptimization(graph, name) {}
 
   void Run() OVERRIDE;
 
diff --git a/compiler/optimizing/constant_folding_test.cc b/compiler/optimizing/constant_folding_test.cc
index d1a2a26..5fac3ac 100644
--- a/compiler/optimizing/constant_folding_test.cc
+++ b/compiler/optimizing/constant_folding_test.cc
@@ -42,7 +42,7 @@
                 const std::string& expected_before,
                 const std::string& expected_after_cf,
                 const std::string& expected_after_dce,
-                std::function<void(HGraph*)> check_after_cf,
+                const std::function<void(HGraph*)>& check_after_cf,
                 Primitive::Type return_type = Primitive::kPrimInt) {
     graph_ = CreateCFG(&allocator_, data, return_type);
     TestCodeOnReadyGraph(expected_before,
@@ -54,7 +54,7 @@
   void TestCodeOnReadyGraph(const std::string& expected_before,
                             const std::string& expected_after_cf,
                             const std::string& expected_after_dce,
-                            std::function<void(HGraph*)> check_after_cf) {
+                            const std::function<void(HGraph*)>& check_after_cf) {
     ASSERT_NE(graph_, nullptr);
 
     StringPrettyPrinter printer_before(graph_);
@@ -65,7 +65,7 @@
     std::unique_ptr<const X86InstructionSetFeatures> features_x86(
         X86InstructionSetFeatures::FromCppDefines());
     x86::CodeGeneratorX86 codegenX86(graph_, *features_x86.get(), CompilerOptions());
-    HConstantFolding(graph_).Run();
+    HConstantFolding(graph_, "constant_folding").Run();
     GraphChecker graph_checker_cf(graph_);
     graph_checker_cf.Run();
     ASSERT_TRUE(graph_checker_cf.IsValid());
@@ -77,7 +77,7 @@
 
     check_after_cf(graph_);
 
-    HDeadCodeElimination(graph_).Run();
+    HDeadCodeElimination(graph_, nullptr /* stats */, "dead_code_elimination").Run();
     GraphChecker graph_checker_dce(graph_);
     graph_checker_dce.Run();
     ASSERT_TRUE(graph_checker_dce.IsValid());
diff --git a/compiler/optimizing/dead_code_elimination.cc b/compiler/optimizing/dead_code_elimination.cc
index aa3f268..c31c66a 100644
--- a/compiler/optimizing/dead_code_elimination.cc
+++ b/compiler/optimizing/dead_code_elimination.cc
@@ -18,6 +18,7 @@
 
 #include "base/array_ref.h"
 #include "base/bit_vector-inl.h"
+#include "base/stl_util.h"
 #include "ssa_phi_elimination.h"
 
 namespace art {
@@ -160,19 +161,32 @@
 //        |      |      |
 //       B4      B5    B?
 //
-// This simplification cannot be applied for loop headers, as they
-// contain a suspend check.
+// Note that individual edges can be redirected (for example B2->B3
+// can be redirected as B2->B5) without applying this optimization
+// to other incoming edges.
+//
+// This simplification cannot be applied to catch blocks, because
+// exception handler edges do not represent normal control flow.
+// Though in theory this could still apply to normal control flow
+// going directly to a catch block, we cannot support it at the
+// moment because the catch Phi's inputs do not correspond to the
+// catch block's predecessors, so we cannot identify which
+// predecessor corresponds to a given statically evaluated input.
+//
+// We do not apply this optimization to loop headers as this could
+// create irreducible loops. We rely on the suspend check in the
+// loop header to prevent the pattern match.
 //
 // Note that we rely on the dead code elimination to get rid of B3.
 bool HDeadCodeElimination::SimplifyIfs() {
   bool simplified_one_or_more_ifs = false;
   bool rerun_dominance_and_loop_analysis = false;
 
-  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
+  for (HBasicBlock* block : graph_->GetReversePostOrder()) {
     HInstruction* last = block->GetLastInstruction();
     HInstruction* first = block->GetFirstInstruction();
-    if (last->IsIf() &&
+    if (!block->IsCatchBlock() &&
+        last->IsIf() &&
         block->HasSinglePhi() &&
         block->GetFirstPhi()->HasOnlyOneNonEnvironmentUse()) {
       bool has_only_phi_and_if = (last == first) && (last->InputAt(0) == block->GetFirstPhi());
@@ -271,20 +285,22 @@
 }
 
 void HDeadCodeElimination::ConnectSuccessiveBlocks() {
-  // Order does not matter.
-  for (HReversePostOrderIterator it(*graph_); !it.Done();) {
-    HBasicBlock* block  = it.Current();
-    if (block->IsEntryBlock() || !block->GetLastInstruction()->IsGoto()) {
-      it.Advance();
-      continue;
+  // Order does not matter. Skip the entry block by starting at index 1 in reverse post order.
+  for (size_t i = 1u, size = graph_->GetReversePostOrder().size(); i != size; ++i) {
+    HBasicBlock* block  = graph_->GetReversePostOrder()[i];
+    DCHECK(!block->IsEntryBlock());
+    while (block->GetLastInstruction()->IsGoto()) {
+      HBasicBlock* successor = block->GetSingleSuccessor();
+      if (successor->IsExitBlock() || successor->GetPredecessors().size() != 1u) {
+        break;
+      }
+      DCHECK_LT(i, IndexOfElement(graph_->GetReversePostOrder(), successor));
+      block->MergeWith(successor);
+      --size;
+      DCHECK_EQ(size, graph_->GetReversePostOrder().size());
+      DCHECK_EQ(block, graph_->GetReversePostOrder()[i]);
+      // Reiterate on this block in case it can be merged with its new successor.
     }
-    HBasicBlock* successor = block->GetSingleSuccessor();
-    if (successor->IsExitBlock() || successor->GetPredecessors().size() != 1u) {
-      it.Advance();
-      continue;
-    }
-    block->MergeWith(successor);
-    // Reiterate on this block in case it can be merged with its new successor.
   }
 }
 
@@ -300,8 +316,7 @@
   // Remove all dead blocks. Iterate in post order because removal needs the
   // block's chain of dominators and nested loops need to be updated from the
   // inside out.
-  for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    HBasicBlock* block  = it.Current();
+  for (HBasicBlock* block : graph_->GetPostOrder()) {
     int id = block->GetBlockId();
     if (!live_blocks.IsBitSet(id)) {
       MaybeRecordDeadBlock(block);
@@ -332,8 +347,7 @@
 void HDeadCodeElimination::RemoveDeadInstructions() {
   // Process basic blocks in post-order in the dominator tree, so that
   // a dead instruction depending on another dead instruction is removed.
-  for (HPostOrderIterator b(*graph_); !b.Done(); b.Advance()) {
-    HBasicBlock* block = b.Current();
+  for (HBasicBlock* block : graph_->GetPostOrder()) {
     // Traverse this block's instructions in backward order and remove
     // the unused ones.
     HBackwardInstructionIterator i(block->GetInstructions());
@@ -343,14 +357,7 @@
     for (i.Advance(); !i.Done(); i.Advance()) {
       HInstruction* inst = i.Current();
       DCHECK(!inst->IsControlFlow());
-      if (!inst->HasSideEffects()
-          && !inst->CanThrow()
-          && !inst->IsSuspendCheck()
-          && !inst->IsNativeDebugInfo()
-          // If we added an explicit barrier then we should keep it.
-          && !inst->IsMemoryBarrier()
-          && !inst->IsParameterValue()
-          && !inst->HasUses()) {
+      if (inst->IsDeadAndRemovable()) {
         block->RemoveInstruction(inst);
         MaybeRecordStat(MethodCompilationStat::kRemovedDeadInstruction);
       }
diff --git a/compiler/optimizing/dead_code_elimination.h b/compiler/optimizing/dead_code_elimination.h
index 58e700d..84fd890 100644
--- a/compiler/optimizing/dead_code_elimination.h
+++ b/compiler/optimizing/dead_code_elimination.h
@@ -29,9 +29,7 @@
  */
 class HDeadCodeElimination : public HOptimization {
  public:
-  HDeadCodeElimination(HGraph* graph,
-                       OptimizingCompilerStats* stats = nullptr,
-                       const char* name = kDeadCodeEliminationPassName)
+  HDeadCodeElimination(HGraph* graph, OptimizingCompilerStats* stats, const char* name)
       : HOptimization(graph, name, stats) {}
 
   void Run() OVERRIDE;
diff --git a/compiler/optimizing/dead_code_elimination_test.cc b/compiler/optimizing/dead_code_elimination_test.cc
index fe52aac..fdd77e7 100644
--- a/compiler/optimizing/dead_code_elimination_test.cc
+++ b/compiler/optimizing/dead_code_elimination_test.cc
@@ -44,7 +44,7 @@
   std::unique_ptr<const X86InstructionSetFeatures> features_x86(
       X86InstructionSetFeatures::FromCppDefines());
   x86::CodeGeneratorX86 codegenX86(graph, *features_x86.get(), CompilerOptions());
-  HDeadCodeElimination(graph).Run();
+  HDeadCodeElimination(graph, nullptr /* stats */, "dead_code_elimination").Run();
   GraphChecker graph_checker(graph);
   graph_checker.Run();
   ASSERT_TRUE(graph_checker.IsValid());
diff --git a/compiler/optimizing/emit_swap_mips_test.cc b/compiler/optimizing/emit_swap_mips_test.cc
new file mode 100644
index 0000000..9dc53e6
--- /dev/null
+++ b/compiler/optimizing/emit_swap_mips_test.cc
@@ -0,0 +1,354 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "base/arena_allocator.h"
+#include "code_generator_mips.h"
+#include "optimizing_unit_test.h"
+#include "parallel_move_resolver.h"
+#include "utils/assembler_test_base.h"
+#include "utils/mips/assembler_mips.h"
+
+#include "gtest/gtest.h"
+
+namespace art {
+
+class EmitSwapMipsTest : public ::testing::Test {
+ public:
+  void SetUp() OVERRIDE {
+    allocator_.reset(new ArenaAllocator(&pool_));
+    graph_ = CreateGraph(allocator_.get());
+    isa_features_ = MipsInstructionSetFeatures::FromCppDefines();
+    codegen_ = new (graph_->GetArena()) mips::CodeGeneratorMIPS(graph_,
+                                                                *isa_features_.get(),
+                                                                CompilerOptions());
+    moves_ = new (allocator_.get()) HParallelMove(allocator_.get());
+    test_helper_.reset(
+        new AssemblerTestInfrastructure(GetArchitectureString(),
+                                        GetAssemblerCmdName(),
+                                        GetAssemblerParameters(),
+                                        GetObjdumpCmdName(),
+                                        GetObjdumpParameters(),
+                                        GetDisassembleCmdName(),
+                                        GetDisassembleParameters(),
+                                        GetAssemblyHeader()));
+  }
+
+  void TearDown() OVERRIDE {
+    allocator_.reset();
+    test_helper_.reset();
+  }
+
+  // Get the typically used name for this architecture.
+  std::string GetArchitectureString() {
+    return "mips";
+  }
+
+  // Get the name of the assembler.
+  std::string GetAssemblerCmdName() {
+    return "as";
+  }
+
+  // Switches to the assembler command.
+  std::string GetAssemblerParameters() {
+    return " --no-warn -32 -march=mips32r2";
+  }
+
+  // Get the name of the objdump.
+  std::string GetObjdumpCmdName() {
+    return "objdump";
+  }
+
+  // Switches to the objdump command.
+  std::string GetObjdumpParameters() {
+    return " -h";
+  }
+
+  // Get the name of the objdump.
+  std::string GetDisassembleCmdName() {
+    return "objdump";
+  }
+
+  // Switches to the objdump command.
+  std::string GetDisassembleParameters() {
+    return " -D -bbinary -mmips:isa32r2";
+  }
+
+  // No need for assembly header here.
+  const char* GetAssemblyHeader() {
+    return nullptr;
+  }
+
+  void DriverWrapper(HParallelMove* move, std::string assembly_text, std::string test_name) {
+    codegen_->GetMoveResolver()->EmitNativeCode(move);
+    assembler_ = codegen_->GetAssembler();
+    assembler_->FinalizeCode();
+    std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(assembler_->CodeSize()));
+    MemoryRegion code(&(*data)[0], data->size());
+    assembler_->FinalizeInstructions(code);
+    test_helper_->Driver(*data, assembly_text, test_name);
+  }
+
+ protected:
+  ArenaPool pool_;
+  HGraph* graph_;
+  HParallelMove* moves_;
+  mips::CodeGeneratorMIPS* codegen_;
+  mips::MipsAssembler* assembler_;
+  std::unique_ptr<ArenaAllocator> allocator_;
+  std::unique_ptr<AssemblerTestInfrastructure> test_helper_;
+  std::unique_ptr<const MipsInstructionSetFeatures> isa_features_;
+};
+
+TEST_F(EmitSwapMipsTest, TwoRegisters) {
+  moves_->AddMove(
+      Location::RegisterLocation(4),
+      Location::RegisterLocation(5),
+      Primitive::kPrimInt,
+      nullptr);
+  moves_->AddMove(
+      Location::RegisterLocation(5),
+      Location::RegisterLocation(4),
+      Primitive::kPrimInt,
+      nullptr);
+  const char* expected =
+      "or $t8, $a1, $zero\n"
+      "or $a1, $a0, $zero\n"
+      "or $a0, $t8, $zero\n";
+  DriverWrapper(moves_, expected, "TwoRegisters");
+}
+
+TEST_F(EmitSwapMipsTest, TwoRegisterPairs) {
+  moves_->AddMove(
+      Location::RegisterPairLocation(4, 5),
+      Location::RegisterPairLocation(6, 7),
+      Primitive::kPrimLong,
+      nullptr);
+  moves_->AddMove(
+      Location::RegisterPairLocation(6, 7),
+      Location::RegisterPairLocation(4, 5),
+      Primitive::kPrimLong,
+      nullptr);
+  const char* expected =
+      "or $t8, $a2, $zero\n"
+      "or $a2, $a0, $zero\n"
+      "or $a0, $t8, $zero\n"
+      "or $t8, $a3, $zero\n"
+      "or $a3, $a1, $zero\n"
+      "or $a1, $t8, $zero\n";
+  DriverWrapper(moves_, expected, "TwoRegisterPairs");
+}
+
+TEST_F(EmitSwapMipsTest, TwoFpuRegistersFloat) {
+  moves_->AddMove(
+      Location::FpuRegisterLocation(4),
+      Location::FpuRegisterLocation(6),
+      Primitive::kPrimFloat,
+      nullptr);
+  moves_->AddMove(
+      Location::FpuRegisterLocation(6),
+      Location::FpuRegisterLocation(4),
+      Primitive::kPrimFloat,
+      nullptr);
+  const char* expected =
+      "mov.s $f8, $f6\n"
+      "mov.s $f6, $f4\n"
+      "mov.s $f4, $f8\n";
+  DriverWrapper(moves_, expected, "TwoFpuRegistersFloat");
+}
+
+TEST_F(EmitSwapMipsTest, TwoFpuRegistersDouble) {
+  moves_->AddMove(
+      Location::FpuRegisterLocation(4),
+      Location::FpuRegisterLocation(6),
+      Primitive::kPrimDouble,
+      nullptr);
+  moves_->AddMove(
+      Location::FpuRegisterLocation(6),
+      Location::FpuRegisterLocation(4),
+      Primitive::kPrimDouble,
+      nullptr);
+  const char* expected =
+      "mov.d $f8, $f6\n"
+      "mov.d $f6, $f4\n"
+      "mov.d $f4, $f8\n";
+  DriverWrapper(moves_, expected, "TwoFpuRegistersDouble");
+}
+
+TEST_F(EmitSwapMipsTest, RegisterAndFpuRegister) {
+  moves_->AddMove(
+      Location::RegisterLocation(4),
+      Location::FpuRegisterLocation(6),
+      Primitive::kPrimFloat,
+      nullptr);
+  moves_->AddMove(
+      Location::FpuRegisterLocation(6),
+      Location::RegisterLocation(4),
+      Primitive::kPrimFloat,
+      nullptr);
+  const char* expected =
+      "or $t8, $a0, $zero\n"
+      "mfc1 $a0, $f6\n"
+      "mtc1 $t8, $f6\n";
+  DriverWrapper(moves_, expected, "RegisterAndFpuRegister");
+}
+
+TEST_F(EmitSwapMipsTest, RegisterPairAndFpuRegister) {
+  moves_->AddMove(
+      Location::RegisterPairLocation(4, 5),
+      Location::FpuRegisterLocation(4),
+      Primitive::kPrimDouble,
+      nullptr);
+  moves_->AddMove(
+      Location::FpuRegisterLocation(4),
+      Location::RegisterPairLocation(4, 5),
+      Primitive::kPrimDouble,
+      nullptr);
+  const char* expected =
+      "mfc1 $t8, $f4\n"
+      "mfc1 $at, $f5\n"
+      "mtc1 $a0, $f4\n"
+      "mtc1 $a1, $f5\n"
+      "or $a0, $t8, $zero\n"
+      "or $a1, $at, $zero\n";
+  DriverWrapper(moves_, expected, "RegisterPairAndFpuRegister");
+}
+
+TEST_F(EmitSwapMipsTest, TwoStackSlots) {
+  moves_->AddMove(
+      Location::StackSlot(52),
+      Location::StackSlot(48),
+      Primitive::kPrimInt,
+      nullptr);
+  moves_->AddMove(
+      Location::StackSlot(48),
+      Location::StackSlot(52),
+      Primitive::kPrimInt,
+      nullptr);
+  const char* expected =
+      "addiu $sp, $sp, -4\n"
+      "sw $v0, 0($sp)\n"
+      "lw $v0, 56($sp)\n"
+      "lw $t8, 52($sp)\n"
+      "sw $v0, 52($sp)\n"
+      "sw $t8, 56($sp)\n"
+      "lw $v0, 0($sp)\n"
+      "addiu $sp, $sp, 4\n";
+  DriverWrapper(moves_, expected, "TwoStackSlots");
+}
+
+TEST_F(EmitSwapMipsTest, TwoDoubleStackSlots) {
+  moves_->AddMove(
+      Location::DoubleStackSlot(56),
+      Location::DoubleStackSlot(48),
+      Primitive::kPrimLong,
+      nullptr);
+  moves_->AddMove(
+      Location::DoubleStackSlot(48),
+      Location::DoubleStackSlot(56),
+      Primitive::kPrimLong,
+      nullptr);
+  const char* expected =
+      "addiu $sp, $sp, -4\n"
+      "sw $v0, 0($sp)\n"
+      "lw $v0, 60($sp)\n"
+      "lw $t8, 52($sp)\n"
+      "sw $v0, 52($sp)\n"
+      "sw $t8, 60($sp)\n"
+      "lw $v0, 64($sp)\n"
+      "lw $t8, 56($sp)\n"
+      "sw $v0, 56($sp)\n"
+      "sw $t8, 64($sp)\n"
+      "lw $v0, 0($sp)\n"
+      "addiu $sp, $sp, 4\n";
+  DriverWrapper(moves_, expected, "TwoDoubleStackSlots");
+}
+
+TEST_F(EmitSwapMipsTest, RegisterAndStackSlot) {
+  moves_->AddMove(
+      Location::RegisterLocation(4),
+      Location::StackSlot(48),
+      Primitive::kPrimInt,
+      nullptr);
+  moves_->AddMove(
+      Location::StackSlot(48),
+      Location::RegisterLocation(4),
+      Primitive::kPrimInt,
+      nullptr);
+  const char* expected =
+      "or $t8, $a0, $zero\n"
+      "lw $a0, 48($sp)\n"
+      "sw $t8, 48($sp)\n";
+  DriverWrapper(moves_, expected, "RegisterAndStackSlot");
+}
+
+TEST_F(EmitSwapMipsTest, RegisterPairAndDoubleStackSlot) {
+  moves_->AddMove(
+      Location::RegisterPairLocation(4, 5),
+      Location::DoubleStackSlot(32),
+      Primitive::kPrimLong,
+      nullptr);
+  moves_->AddMove(
+      Location::DoubleStackSlot(32),
+      Location::RegisterPairLocation(4, 5),
+      Primitive::kPrimLong,
+      nullptr);
+  const char* expected =
+      "or $t8, $a0, $zero\n"
+      "lw $a0, 32($sp)\n"
+      "sw $t8, 32($sp)\n"
+      "or $t8, $a1, $zero\n"
+      "lw $a1, 36($sp)\n"
+      "sw $t8, 36($sp)\n";
+  DriverWrapper(moves_, expected, "RegisterPairAndDoubleStackSlot");
+}
+
+TEST_F(EmitSwapMipsTest, FpuRegisterAndStackSlot) {
+  moves_->AddMove(
+      Location::FpuRegisterLocation(4),
+      Location::StackSlot(48),
+      Primitive::kPrimFloat,
+      nullptr);
+  moves_->AddMove(
+      Location::StackSlot(48),
+      Location::FpuRegisterLocation(4),
+      Primitive::kPrimFloat,
+      nullptr);
+  const char* expected =
+      "mov.s $f8, $f4\n"
+      "lwc1 $f4, 48($sp)\n"
+      "swc1 $f8, 48($sp)\n";
+  DriverWrapper(moves_, expected, "FpuRegisterAndStackSlot");
+}
+
+TEST_F(EmitSwapMipsTest, FpuRegisterAndDoubleStackSlot) {
+  moves_->AddMove(
+      Location::FpuRegisterLocation(4),
+      Location::DoubleStackSlot(48),
+      Primitive::kPrimDouble,
+      nullptr);
+  moves_->AddMove(
+      Location::DoubleStackSlot(48),
+      Location::FpuRegisterLocation(4),
+      Primitive::kPrimDouble,
+      nullptr);
+  const char* expected =
+      "mov.d $f8, $f4\n"
+      "ldc1 $f4, 48($sp)\n"
+      "sdc1 $f8, 48($sp)\n";
+  DriverWrapper(moves_, expected, "FpuRegisterAndDoubleStackSlot");
+}
+
+}  // namespace art
diff --git a/compiler/optimizing/graph_visualizer.cc b/compiler/optimizing/graph_visualizer.cc
index 912ee29..09dcefa 100644
--- a/compiler/optimizing/graph_visualizer.cc
+++ b/compiler/optimizing/graph_visualizer.cc
@@ -441,8 +441,8 @@
 
   void VisitInvoke(HInvoke* invoke) OVERRIDE {
     StartAttributeStream("dex_file_index") << invoke->GetDexMethodIndex();
-    StartAttributeStream("method_name") << PrettyMethod(
-        invoke->GetDexMethodIndex(), GetGraph()->GetDexFile(), /* with_signature */ false);
+    StartAttributeStream("method_name") << GetGraph()->GetDexFile().PrettyMethod(
+        invoke->GetDexMethodIndex(), /* with_signature */ false);
   }
 
   void VisitInvokeUnresolved(HInvokeUnresolved* invoke) OVERRIDE {
@@ -465,15 +465,15 @@
   }
 
   void VisitInstanceFieldGet(HInstanceFieldGet* iget) OVERRIDE {
-    StartAttributeStream("field_name") << PrettyField(iget->GetFieldInfo().GetFieldIndex(),
-                                                      iget->GetFieldInfo().GetDexFile(),
+    StartAttributeStream("field_name") <<
+        iget->GetFieldInfo().GetDexFile().PrettyField(iget->GetFieldInfo().GetFieldIndex(),
                                                       /* with type */ false);
     StartAttributeStream("field_type") << iget->GetFieldType();
   }
 
   void VisitInstanceFieldSet(HInstanceFieldSet* iset) OVERRIDE {
-    StartAttributeStream("field_name") << PrettyField(iset->GetFieldInfo().GetFieldIndex(),
-                                                      iset->GetFieldInfo().GetDexFile(),
+    StartAttributeStream("field_name") <<
+        iset->GetFieldInfo().GetDexFile().PrettyField(iset->GetFieldInfo().GetFieldIndex(),
                                                       /* with type */ false);
     StartAttributeStream("field_type") << iset->GetFieldType();
   }
@@ -604,7 +604,8 @@
         : instruction->GetReferenceTypeInfo();
       ScopedObjectAccess soa(Thread::Current());
       if (info.IsValid()) {
-        StartAttributeStream("klass") << PrettyDescriptor(info.GetTypeHandle().Get());
+        StartAttributeStream("klass")
+            << mirror::Class::PrettyDescriptor(info.GetTypeHandle().Get());
         StartAttributeStream("can_be_null")
             << std::boolalpha << instruction->CanBeNull() << std::noboolalpha;
         StartAttributeStream("exact") << std::boolalpha << info.IsExact() << std::noboolalpha;
diff --git a/compiler/optimizing/gvn.cc b/compiler/optimizing/gvn.cc
index 1e86b75..f5931a2 100644
--- a/compiler/optimizing/gvn.cc
+++ b/compiler/optimizing/gvn.cc
@@ -411,8 +411,8 @@
 
   // Use the reverse post order to ensure the non back-edge predecessors of a block are
   // visited before the block itself.
-  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    VisitBasicBlock(it.Current());
+  for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+    VisitBasicBlock(block);
   }
 }
 
diff --git a/compiler/optimizing/induction_var_analysis.cc b/compiler/optimizing/induction_var_analysis.cc
index c501ccf..f2602fb 100644
--- a/compiler/optimizing/induction_var_analysis.cc
+++ b/compiler/optimizing/induction_var_analysis.cc
@@ -23,12 +23,12 @@
  * Since graph traversal may enter a SCC at any position, an initial representation may be rotated,
  * along dependences, viz. any of (a, b, c, d), (d, a, b, c)  (c, d, a, b), (b, c, d, a) assuming
  * a chain of dependences (mutual independent items may occur in arbitrary order). For proper
- * classification, the lexicographically first entry-phi is rotated to the front.
+ * classification, the lexicographically first loop-phi is rotated to the front.
  */
 static void RotateEntryPhiFirst(HLoopInformation* loop,
                                 ArenaVector<HInstruction*>* scc,
                                 ArenaVector<HInstruction*>* new_scc) {
-  // Find very first entry-phi.
+  // Find very first loop-phi.
   const HInstructionList& phis = loop->GetHeader()->GetPhis();
   HInstruction* phi = nullptr;
   size_t phi_pos = -1;
@@ -41,7 +41,7 @@
     }
   }
 
-  // If found, bring that entry-phi to front.
+  // If found, bring that loop-phi to front.
   if (phi != nullptr) {
     new_scc->clear();
     for (size_t i = 0; i < size; i++) {
@@ -87,23 +87,24 @@
     : HOptimization(graph, kInductionPassName),
       global_depth_(0),
       stack_(graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)),
-      scc_(graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)),
       map_(std::less<HInstruction*>(),
            graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)),
+      scc_(graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)),
       cycle_(std::less<HInstruction*>(),
              graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)),
+      type_(Primitive::kPrimVoid),
       induction_(std::less<HLoopInformation*>(),
-                 graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)) {
+                 graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)),
+      cycles_(std::less<HPhi*>(),
+              graph->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)) {
 }
 
 void HInductionVarAnalysis::Run() {
   // Detects sequence variables (generalized induction variables) during an outer to inner
   // traversal of all loops using Gerlek's algorithm. The order is important to enable
   // range analysis on outer loop while visiting inner loops.
-  for (HReversePostOrderIterator it_graph(*graph_); !it_graph.Done(); it_graph.Advance()) {
-    HBasicBlock* graph_block = it_graph.Current();
+  for (HBasicBlock* graph_block : graph_->GetReversePostOrder()) {
     // Don't analyze irreducible loops.
-    // TODO(ajcbik): could/should we remove this restriction?
     if (graph_block->IsLoopHeader() && !graph_block->GetLoopInformation()->IsIrreducible()) {
       VisitLoop(graph_block->GetLoopInformation());
     }
@@ -121,7 +122,7 @@
     HBasicBlock* loop_block = it_loop.Current();
     DCHECK(loop_block->IsInLoop());
     if (loop_block->GetLoopInformation() != loop) {
-      continue;  // Inner loops already visited.
+      continue;  // Inner loops visited later.
     }
     // Visit phi-operations and instructions.
     for (HInstructionIterator it(loop_block->GetPhis()); !it.Done(); it.Advance()) {
@@ -245,13 +246,13 @@
   const size_t size = scc_.size();
   DCHECK_GE(size, 1u);
 
-  // Rotate proper entry-phi to front.
+  // Rotate proper loop-phi to front.
   if (size > 1) {
     ArenaVector<HInstruction*> other(graph_->GetArena()->Adapter(kArenaAllocInductionVarAnalysis));
     RotateEntryPhiFirst(loop, &scc_, &other);
   }
 
-  // Analyze from entry-phi onwards.
+  // Analyze from loop-phi onwards.
   HInstruction* phi = scc_[0];
   if (!phi->IsLoopHeaderPhi()) {
     return;
@@ -263,6 +264,9 @@
     return;
   }
 
+  // Store interesting cycle.
+  AssignCycle(phi->AsPhi());
+
   // Singleton is wrap-around induction if all internal links have the same meaning.
   if (size == 1) {
     InductionInfo* update = TransferPhi(loop, phi, /* input_index */ 1);
@@ -285,6 +289,12 @@
     } else if (instruction->IsSub()) {
       update = SolveAddSub(
           loop, phi, instruction, instruction->InputAt(0), instruction->InputAt(1), kSub, true);
+    } else if (instruction->IsXor()) {
+      update = SolveXor(loop, phi, instruction, instruction->InputAt(0), instruction->InputAt(1));
+    } else if (instruction->IsEqual()) {
+      update = SolveTest(loop, phi, instruction, 0);
+    } else if (instruction->IsNotEqual()) {
+      update = SolveTest(loop, phi, instruction, 1);
     } else if (instruction->IsTypeConversion()) {
       update = SolveCnv(instruction->AsTypeConversion());
     }
@@ -360,6 +370,7 @@
   // can be combined with an invariant to yield a similar result. Even two linear inputs can
   // be combined. All other combinations fail, however.
   if (a != nullptr && b != nullptr) {
+    type_ = Narrowest(type_, Narrowest(a->type, b->type));
     if (a->induction_class == kInvariant && b->induction_class == kInvariant) {
       return CreateInvariantOp(op, a, b);
     } else if (a->induction_class == kLinear && b->induction_class == kLinear) {
@@ -396,6 +407,7 @@
   // can be multiplied with an invariant to yield a similar but multiplied result.
   // Two non-invariant inputs cannot be multiplied, however.
   if (a != nullptr && b != nullptr) {
+    type_ = Narrowest(type_, Narrowest(a->type, b->type));
     if (a->induction_class == kInvariant && b->induction_class == kInvariant) {
       return CreateInvariantOp(kMul, a, b);
     } else if (a->induction_class == kInvariant) {
@@ -436,6 +448,7 @@
   // Transfer over a unary negation: an invariant, linear, wrap-around, or periodic input
   // yields a similar but negated induction as result.
   if (a != nullptr) {
+    type_ = Narrowest(type_, a->type);
     if (a->induction_class == kInvariant) {
       return CreateInvariantOp(kNeg, nullptr, a);
     }
@@ -553,6 +566,42 @@
   return nullptr;
 }
 
+HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveXor(HLoopInformation* loop,
+                                                                      HInstruction* entry_phi,
+                                                                      HInstruction* instruction,
+                                                                      HInstruction* x,
+                                                                      HInstruction* y) {
+  // Solve within a tight cycle on x = c ^ x or x = x ^ c.
+  if (entry_phi->InputCount() == 2 && instruction == entry_phi->InputAt(1)) {
+    InductionInfo* initial = LookupInfo(loop, entry_phi->InputAt(0));
+    InductionInfo* a = LookupInfo(loop, x);
+    if (a != nullptr && a->induction_class == kInvariant && entry_phi == y) {
+      return CreateInduction(kPeriodic, CreateInvariantOp(kXor, a, initial), initial, type_);
+    }
+    InductionInfo* b = LookupInfo(loop, y);
+    if (b != nullptr && b->induction_class == kInvariant && entry_phi == x) {
+      return CreateInduction(kPeriodic, CreateInvariantOp(kXor, initial, b), initial, type_);
+    }
+  }
+  return nullptr;
+}
+
+HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveTest(HLoopInformation* loop,
+                                                                       HInstruction* entry_phi,
+                                                                       HInstruction* instruction,
+                                                                       int64_t opposite_value) {
+  // Detect hidden XOR construction in tight cycles on x = (x == 0) or x = (x != 1).
+  int64_t value = -1;
+  HInstruction* x = instruction->InputAt(0);
+  HInstruction* y = instruction->InputAt(1);
+  if (IsExact(LookupInfo(loop, x), &value) && value == opposite_value) {
+    return SolveXor(loop, entry_phi, instruction, graph_->GetIntConstant(1), y);
+  } else if (IsExact(LookupInfo(loop, y), &value) && value == opposite_value) {
+    return SolveXor(loop, entry_phi, instruction, x, graph_->GetIntConstant(1));
+  }
+  return nullptr;
+}
+
 HInductionVarAnalysis::InductionInfo* HInductionVarAnalysis::SolveCnv(HTypeConversion* conversion) {
   Primitive::Type from = conversion->GetInputType();
   Primitive::Type to = conversion->GetResultType();
@@ -850,8 +899,8 @@
   int64_t value = -1;
   if (IsExact(a, &value)) {
     if (value == 0) {
-      // Simplify 0 + b = b, 0 * b = 0.
-      if (op == kAdd) {
+      // Simplify 0 + b = b, 0 ^ b = b, 0 * b = 0.
+      if (op == kAdd || op == kXor) {
         return b;
       } else if (op == kMul) {
         return a;
@@ -867,8 +916,8 @@
   }
   if (IsExact(b, &value)) {
     if (value == 0) {
-      // Simplify a + 0 = a, a - 0 = a, a * 0 = 0, -0 = 0.
-      if (op == kAdd || op == kSub) {
+      // Simplify a + 0 = a, a - 0 = a, a ^ 0 = a, a * 0 = 0, -0 = 0.
+      if (op == kAdd || op == kSub || op == kXor) {
         return a;
       } else if (op == kMul || op == kNeg) {
         return b;
@@ -899,6 +948,23 @@
   return new (graph_->GetArena()) InductionInfo(kInvariant, op, a, b, nullptr, b->type);
 }
 
+
+void HInductionVarAnalysis::AssignCycle(HPhi* phi) {
+  ArenaSet<HInstruction*>* set = &cycles_.Put(phi, ArenaSet<HInstruction*>(
+      graph_->GetArena()->Adapter(kArenaAllocInductionVarAnalysis)))->second;
+  for (HInstruction* i : scc_) {
+    set->insert(i);
+  }
+}
+
+ArenaSet<HInstruction*>* HInductionVarAnalysis::LookupCycle(HPhi* phi) {
+  auto it = cycles_.find(phi);
+  if (it != cycles_.end()) {
+    return &it->second;
+  }
+  return nullptr;
+}
+
 bool HInductionVarAnalysis::IsExact(InductionInfo* info, int64_t* value) {
   return InductionVarRange(this).IsConstant(info, InductionVarRange::kExact, value);
 }
@@ -939,6 +1005,7 @@
         case kNeg:   inv += " - ";  break;
         case kMul:   inv += " * ";  break;
         case kDiv:   inv += " / ";  break;
+        case kXor:   inv += " ^ ";  break;
         case kLT:    inv += " < ";  break;
         case kLE:    inv += " <= "; break;
         case kGT:    inv += " > ";  break;
diff --git a/compiler/optimizing/induction_var_analysis.h b/compiler/optimizing/induction_var_analysis.h
index cd4c830..7027179 100644
--- a/compiler/optimizing/induction_var_analysis.h
+++ b/compiler/optimizing/induction_var_analysis.h
@@ -64,6 +64,7 @@
     kNeg,
     kMul,
     kDiv,
+    kXor,
     kFetch,
     // Trip-counts.
     kTripCountInLoop,        // valid in full loop; loop is finite
@@ -171,7 +172,16 @@
                              HInstruction* x,
                              HInstruction* y,
                              InductionOp op,
-                             bool is_first_call);
+                             bool is_first_call);  // possibly swaps x and y to try again
+  InductionInfo* SolveXor(HLoopInformation* loop,
+                          HInstruction* entry_phi,
+                          HInstruction* instruction,
+                          HInstruction* x,
+                          HInstruction* y);
+  InductionInfo* SolveTest(HLoopInformation* loop,
+                           HInstruction* entry_phi,
+                           HInstruction* instruction,
+                           int64_t oppositive_value);
   InductionInfo* SolveCnv(HTypeConversion* conversion);
 
   // Trip count information.
@@ -204,6 +214,8 @@
   InductionInfo* LookupInfo(HLoopInformation* loop, HInstruction* instruction);
   InductionInfo* CreateConstant(int64_t value, Primitive::Type type);
   InductionInfo* CreateSimplifiedInvariant(InductionOp op, InductionInfo* a, InductionInfo* b);
+  void AssignCycle(HPhi* phi);
+  ArenaSet<HInstruction*>* LookupCycle(HPhi* phi);
 
   // Constants.
   bool IsExact(InductionInfo* info, /*out*/ int64_t* value);
@@ -219,8 +231,8 @@
   // Temporary book-keeping during the analysis.
   uint32_t global_depth_;
   ArenaVector<HInstruction*> stack_;
-  ArenaVector<HInstruction*> scc_;
   ArenaSafeMap<HInstruction*, NodeInfo> map_;
+  ArenaVector<HInstruction*> scc_;
   ArenaSafeMap<HInstruction*, InductionInfo*> cycle_;
   Primitive::Type type_;
 
@@ -230,6 +242,11 @@
    */
   ArenaSafeMap<HLoopInformation*, ArenaSafeMap<HInstruction*, InductionInfo*>> induction_;
 
+  /**
+   * Preserves induction cycle information for each loop-phi.
+   */
+  ArenaSafeMap<HPhi*, ArenaSet<HInstruction*>> cycles_;
+
   friend class InductionVarAnalysisTest;
   friend class InductionVarRange;
   friend class InductionVarRangeTest;
diff --git a/compiler/optimizing/induction_var_analysis_test.cc b/compiler/optimizing/induction_var_analysis_test.cc
index 292bc4e..031f1d7 100644
--- a/compiler/optimizing/induction_var_analysis_test.cc
+++ b/compiler/optimizing/induction_var_analysis_test.cc
@@ -107,7 +107,7 @@
   }
 
   // Builds if-statement at depth d.
-  HPhi* BuildIf(int d, HBasicBlock** ifT, HBasicBlock **ifF) {
+  HPhi* BuildIf(int d, HBasicBlock** ifT, HBasicBlock** ifF) {
     HBasicBlock* cond = new (&allocator_) HBasicBlock(graph_);
     HBasicBlock* ifTrue = new (&allocator_) HBasicBlock(graph_);
     HBasicBlock* ifFalse = new (&allocator_) HBasicBlock(graph_);
@@ -259,15 +259,15 @@
   //   k = - i;
   // }
   BuildLoopNest(1);
-  HInstruction *add = InsertInstruction(
+  HInstruction* add = InsertInstruction(
       new (&allocator_) HAdd(Primitive::kPrimInt, constant100_, basic_[0]), 0);
-  HInstruction *sub = InsertInstruction(
+  HInstruction* sub = InsertInstruction(
       new (&allocator_) HSub(Primitive::kPrimInt, constant100_, basic_[0]), 0);
-  HInstruction *mul = InsertInstruction(
+  HInstruction* mul = InsertInstruction(
       new (&allocator_) HMul(Primitive::kPrimInt, constant100_, basic_[0]), 0);
-  HInstruction *shl = InsertInstruction(
+  HInstruction* shl = InsertInstruction(
       new (&allocator_) HShl(Primitive::kPrimInt, basic_[0], constant1_), 0);
-  HInstruction *neg = InsertInstruction(
+  HInstruction* neg = InsertInstruction(
       new (&allocator_) HNeg(Primitive::kPrimInt, basic_[0]), 0);
   PerformInductionVarAnalysis();
 
@@ -291,10 +291,10 @@
   HPhi* k = InsertLoopPhi(0, 0);
   k->AddInput(constant0_);
 
-  HInstruction *add = InsertInstruction(
+  HInstruction* add = InsertInstruction(
       new (&allocator_) HAdd(Primitive::kPrimInt, k, constant100_), 0);
   HInstruction* store1 = InsertArrayStore(add, 0);
-  HInstruction *sub = InsertInstruction(
+  HInstruction* sub = InsertInstruction(
       new (&allocator_) HSub(Primitive::kPrimInt, add, constant1_), 0);
   HInstruction* store2 = InsertArrayStore(sub, 0);
   k->AddInput(sub);
@@ -381,7 +381,7 @@
   k->AddInput(constant0_);
 
   HInstruction* store = InsertArrayStore(k, 0);
-  HInstruction *sub = InsertInstruction(
+  HInstruction* sub = InsertInstruction(
       new (&allocator_) HSub(Primitive::kPrimInt, constant100_, basic_[0]), 0);
   k->AddInput(sub);
   PerformInductionVarAnalysis();
@@ -407,7 +407,7 @@
 
   HInstruction* store = InsertArrayStore(k, 0);
   k->AddInput(t);
-  HInstruction *sub = InsertInstruction(
+  HInstruction* sub = InsertInstruction(
       new (&allocator_) HSub(Primitive::kPrimInt, constant100_, basic_[0], 0), 0);
   t->AddInput(sub);
   PerformInductionVarAnalysis();
@@ -431,15 +431,15 @@
   HPhi* k = InsertLoopPhi(0, 0);
   k->AddInput(constant0_);
 
-  HInstruction *add = InsertInstruction(
+  HInstruction* add = InsertInstruction(
       new (&allocator_) HAdd(Primitive::kPrimInt, k, constant100_), 0);
-  HInstruction *sub = InsertInstruction(
+  HInstruction* sub = InsertInstruction(
       new (&allocator_) HSub(Primitive::kPrimInt, k, constant100_), 0);
-  HInstruction *mul = InsertInstruction(
+  HInstruction* mul = InsertInstruction(
       new (&allocator_) HMul(Primitive::kPrimInt, k, constant100_), 0);
-  HInstruction *shl = InsertInstruction(
+  HInstruction* shl = InsertInstruction(
       new (&allocator_) HShl(Primitive::kPrimInt, k, constant1_), 0);
-  HInstruction *neg = InsertInstruction(
+  HInstruction* neg = InsertInstruction(
       new (&allocator_) HNeg(Primitive::kPrimInt, k), 0);
   k->AddInput(
       InsertInstruction(new (&allocator_) HShl(Primitive::kPrimInt, basic_[0], constant1_), 0));
@@ -497,7 +497,7 @@
   k->AddInput(constant0_);
 
   HInstruction* store = InsertArrayStore(k, 0);
-  HInstruction *sub = InsertInstruction(
+  HInstruction* sub = InsertInstruction(
       new (&allocator_) HSub(Primitive::kPrimInt, constant1_, k), 0);
   k->AddInput(sub);
   PerformInductionVarAnalysis();
@@ -506,6 +506,131 @@
   EXPECT_STREQ("periodic((1), (0)):PrimInt", GetInductionInfo(sub, 0).c_str());
 }
 
+TEST_F(InductionVarAnalysisTest, FindXorPeriodicInduction) {
+  // Setup:
+  // k = 0;
+  // for (int i = 0; i < 100; i++) {
+  //   a[k] = 0;
+  //   k = k ^ 1;
+  // }
+  BuildLoopNest(1);
+  HPhi* k = InsertLoopPhi(0, 0);
+  k->AddInput(constant0_);
+
+  HInstruction* store = InsertArrayStore(k, 0);
+  HInstruction* x = InsertInstruction(
+      new (&allocator_) HXor(Primitive::kPrimInt, k, constant1_), 0);
+  k->AddInput(x);
+  PerformInductionVarAnalysis();
+
+  EXPECT_STREQ("periodic((0), (1)):PrimInt", GetInductionInfo(store->InputAt(1), 0).c_str());
+  EXPECT_STREQ("periodic((1), (0)):PrimInt", GetInductionInfo(x, 0).c_str());
+}
+
+TEST_F(InductionVarAnalysisTest, FindXorConstantLeftPeriodicInduction) {
+  // Setup:
+  // k = 1;
+  // for (int i = 0; i < 100; i++) {
+  //   k = 1 ^ k;
+  // }
+  BuildLoopNest(1);
+  HPhi* k = InsertLoopPhi(0, 0);
+  k->AddInput(constant1_);
+
+  HInstruction* x = InsertInstruction(
+      new (&allocator_) HXor(Primitive::kPrimInt, constant1_, k), 0);
+  k->AddInput(x);
+  PerformInductionVarAnalysis();
+
+  EXPECT_STREQ("periodic(((1) ^ (1)), (1)):PrimInt", GetInductionInfo(x, 0).c_str());
+}
+
+TEST_F(InductionVarAnalysisTest, FindXor100PeriodicInduction) {
+  // Setup:
+  // k = 1;
+  // for (int i = 0; i < 100; i++) {
+  //   k = k ^ 100;
+  // }
+  BuildLoopNest(1);
+  HPhi* k = InsertLoopPhi(0, 0);
+  k->AddInput(constant1_);
+
+  HInstruction* x = InsertInstruction(
+      new (&allocator_) HXor(Primitive::kPrimInt, k, constant100_), 0);
+  k->AddInput(x);
+  PerformInductionVarAnalysis();
+
+  EXPECT_STREQ("periodic(((1) ^ (100)), (1)):PrimInt", GetInductionInfo(x, 0).c_str());
+}
+
+TEST_F(InductionVarAnalysisTest, FindBooleanEqPeriodicInduction) {
+  // Setup:
+  // k = 0;
+  // for (int i = 0; i < 100; i++) {
+  //   k = (k == 0);
+  // }
+  BuildLoopNest(1);
+  HPhi* k = InsertLoopPhi(0, 0);
+  k->AddInput(constant0_);
+
+  HInstruction* x = InsertInstruction(new (&allocator_) HEqual(k, constant0_), 0);
+  k->AddInput(x);
+  PerformInductionVarAnalysis();
+
+  EXPECT_STREQ("periodic((1), (0)):PrimBoolean", GetInductionInfo(x, 0).c_str());
+}
+
+TEST_F(InductionVarAnalysisTest, FindBooleanEqConstantLeftPeriodicInduction) {
+  // Setup:
+  // k = 0;
+  // for (int i = 0; i < 100; i++) {
+  //   k = (0 == k);
+  // }
+  BuildLoopNest(1);
+  HPhi* k = InsertLoopPhi(0, 0);
+  k->AddInput(constant0_);
+
+  HInstruction* x = InsertInstruction(new (&allocator_) HEqual(constant0_, k), 0);
+  k->AddInput(x);
+  PerformInductionVarAnalysis();
+
+  EXPECT_STREQ("periodic((1), (0)):PrimBoolean", GetInductionInfo(x, 0).c_str());
+}
+
+TEST_F(InductionVarAnalysisTest, FindBooleanNePeriodicInduction) {
+  // Setup:
+  // k = 0;
+  // for (int i = 0; i < 100; i++) {
+  //   k = (k != 1);
+  // }
+  BuildLoopNest(1);
+  HPhi* k = InsertLoopPhi(0, 0);
+  k->AddInput(constant0_);
+
+  HInstruction* x = InsertInstruction(new (&allocator_) HNotEqual(k, constant1_), 0);
+  k->AddInput(x);
+  PerformInductionVarAnalysis();
+
+  EXPECT_STREQ("periodic((1), (0)):PrimBoolean", GetInductionInfo(x, 0).c_str());
+}
+
+TEST_F(InductionVarAnalysisTest, FindBooleanNeConstantLeftPeriodicInduction) {
+  // Setup:
+  // k = 0;
+  // for (int i = 0; i < 100; i++) {
+  //   k = (1 != k);
+  // }
+  BuildLoopNest(1);
+  HPhi* k = InsertLoopPhi(0, 0);
+  k->AddInput(constant0_);
+
+  HInstruction* x = InsertInstruction(new (&allocator_) HNotEqual(constant1_, k), 0);
+  k->AddInput(x);
+  PerformInductionVarAnalysis();
+
+  EXPECT_STREQ("periodic((1), (0)):PrimBoolean", GetInductionInfo(x, 0).c_str());
+}
+
 TEST_F(InductionVarAnalysisTest, FindDerivedPeriodicInduction) {
   // Setup:
   // k = 0;
@@ -526,15 +651,15 @@
   k_header->AddInput(k_body);
 
   // Derived expressions.
-  HInstruction *add = InsertInstruction(
+  HInstruction* add = InsertInstruction(
       new (&allocator_) HAdd(Primitive::kPrimInt, k_body, constant100_), 0);
-  HInstruction *sub = InsertInstruction(
+  HInstruction* sub = InsertInstruction(
       new (&allocator_) HSub(Primitive::kPrimInt, k_body, constant100_), 0);
-  HInstruction *mul = InsertInstruction(
+  HInstruction* mul = InsertInstruction(
       new (&allocator_) HMul(Primitive::kPrimInt, k_body, constant100_), 0);
-  HInstruction *shl = InsertInstruction(
+  HInstruction* shl = InsertInstruction(
       new (&allocator_) HShl(Primitive::kPrimInt, k_body, constant1_), 0);
-  HInstruction *neg = InsertInstruction(
+  HInstruction* neg = InsertInstruction(
       new (&allocator_) HNeg(Primitive::kPrimInt, k_body), 0);
   PerformInductionVarAnalysis();
 
@@ -563,7 +688,7 @@
     k[d] = InsertLoopPhi(0, d);
   }
 
-  HInstruction *inc = InsertInstruction(
+  HInstruction* inc = InsertInstruction(
       new (&allocator_) HAdd(Primitive::kPrimInt, constant1_, k[9]), 9);
   HInstruction* store = InsertArrayStore(inc, 9);
 
@@ -597,7 +722,7 @@
   //   a[i] = 0;
   // }
   BuildLoopNest(1);
-  HInstruction *conv = InsertInstruction(
+  HInstruction* conv = InsertInstruction(
       new (&allocator_) HTypeConversion(Primitive::kPrimByte, basic_[0], -1), 0);
   HInstruction* store1 = InsertArrayStore(conv, 0);
   HInstruction* store2 = InsertArrayStore(basic_[0], 0);
@@ -615,6 +740,31 @@
   EXPECT_STREQ("((100) (TC-loop) ((0) < (100)))", GetTripCount(0).c_str());
 }
 
+TEST_F(InductionVarAnalysisTest, ByteInductionDerivedIntLoopControl) {
+  // Setup:
+  // for (int i = 0; i < 100; i++) {
+  //   k = (byte) i;
+  //   a[k] = 0;
+  //   k = k + 1
+  //   a[k] = 0;
+  // }
+  BuildLoopNest(1);
+  HInstruction* conv = InsertInstruction(
+      new (&allocator_) HTypeConversion(Primitive::kPrimByte, basic_[0], -1), 0);
+  HInstruction* store1 = InsertArrayStore(conv, 0);
+  HInstruction* add = InsertInstruction(
+      new (&allocator_) HAdd(Primitive::kPrimInt, conv, constant1_), 0);
+  HInstruction* store2 = InsertArrayStore(add, 0);
+
+  PerformInductionVarAnalysis();
+
+  // Byte induction (k) is "transferred" over conversion into addition (k + 1).
+  // This means only values within byte range can be trusted (even though
+  // addition can jump out of the range of course).
+  EXPECT_STREQ("((1) * i + (0)):PrimByte", GetInductionInfo(store1->InputAt(1), 0).c_str());
+  EXPECT_STREQ("((1) * i + (1)):PrimByte", GetInductionInfo(store2->InputAt(1), 0).c_str());
+}
+
 TEST_F(InductionVarAnalysisTest, ByteLoopControl1) {
   // Setup:
   // for (byte i = -128; i < 127; i++) {  // just fits!
diff --git a/compiler/optimizing/induction_var_range.cc b/compiler/optimizing/induction_var_range.cc
index cd8b7c7..235793d 100644
--- a/compiler/optimizing/induction_var_range.cc
+++ b/compiler/optimizing/induction_var_range.cc
@@ -58,22 +58,90 @@
 }
 
 /**
- * An upper bound a * (length / a) + b, where a >= 1, can be conservatively rewritten as length + b
- * because length >= 0 is true. This makes it more likely the bound is useful to clients.
+ * Detects an instruction that is >= 0. As long as the value is carried by
+ * a single instruction, arithmetic wrap-around cannot occur.
  */
-static InductionVarRange::Value SimplifyMax(InductionVarRange::Value v) {
-  int64_t value;
-  if (v.is_known &&
-      v.a_constant >= 1 &&
-      v.instruction->IsDiv() &&
-      v.instruction->InputAt(0)->IsArrayLength() &&
-      IsIntAndGet(v.instruction->InputAt(1), &value) && v.a_constant == value) {
-    return InductionVarRange::Value(v.instruction->InputAt(0), 1, v.b_constant);
+static bool IsGEZero(HInstruction* instruction) {
+  DCHECK(instruction != nullptr);
+  if (instruction->IsArrayLength()) {
+    return true;
+  } else if (instruction->IsInvokeStaticOrDirect()) {
+    switch (instruction->AsInvoke()->GetIntrinsic()) {
+      case Intrinsics::kMathMinIntInt:
+      case Intrinsics::kMathMinLongLong:
+        // Instruction MIN(>=0, >=0) is >= 0.
+        return IsGEZero(instruction->InputAt(0)) &&
+               IsGEZero(instruction->InputAt(1));
+      case Intrinsics::kMathAbsInt:
+      case Intrinsics::kMathAbsLong:
+        // Instruction ABS(x) is >= 0.
+        return true;
+      default:
+        break;
+    }
+  }
+  int64_t value = -1;
+  return IsIntAndGet(instruction, &value) && value >= 0;
+}
+
+/** Hunts "under the hood" for a suitable instruction at the hint. */
+static bool IsMaxAtHint(
+    HInstruction* instruction, HInstruction* hint, /*out*/HInstruction** suitable) {
+  if (instruction->IsInvokeStaticOrDirect()) {
+    switch (instruction->AsInvoke()->GetIntrinsic()) {
+      case Intrinsics::kMathMinIntInt:
+      case Intrinsics::kMathMinLongLong:
+        // For MIN(x, y), return most suitable x or y as maximum.
+        return IsMaxAtHint(instruction->InputAt(0), hint, suitable) ||
+               IsMaxAtHint(instruction->InputAt(1), hint, suitable);
+      default:
+        break;
+    }
+  } else {
+    *suitable = instruction;
+    while (instruction->IsArrayLength() ||
+           instruction->IsNullCheck() ||
+           instruction->IsNewArray()) {
+      instruction = instruction->InputAt(0);
+    }
+    return instruction == hint;
+  }
+  return false;
+}
+
+/** Post-analysis simplification of a minimum value that makes the bound more useful to clients. */
+static InductionVarRange::Value SimplifyMin(InductionVarRange::Value v) {
+  if (v.is_known && v.a_constant == 1 && v.b_constant <= 0) {
+    // If a == 1,  instruction >= 0 and b <= 0, just return the constant b.
+    // No arithmetic wrap-around can occur.
+    if (IsGEZero(v.instruction)) {
+      return InductionVarRange::Value(v.b_constant);
+    }
   }
   return v;
 }
 
-/** Helper method to test for a constant value. */
+/** Post-analysis simplification of a maximum value that makes the bound more useful to clients. */
+static InductionVarRange::Value SimplifyMax(InductionVarRange::Value v, HInstruction* hint) {
+  if (v.is_known && v.a_constant >= 1) {
+    // An upper bound a * (length / a) + b, where a >= 1, can be conservatively rewritten as
+    // length + b because length >= 0 is true.
+    int64_t value;
+    if (v.instruction->IsDiv() &&
+        v.instruction->InputAt(0)->IsArrayLength() &&
+        IsIntAndGet(v.instruction->InputAt(1), &value) && v.a_constant == value) {
+      return InductionVarRange::Value(v.instruction->InputAt(0), 1, v.b_constant);
+    }
+    // If a == 1, the most suitable one suffices as maximum value.
+    HInstruction* suitable = nullptr;
+    if (v.a_constant == 1 && IsMaxAtHint(v.instruction, hint, &suitable)) {
+      return InductionVarRange::Value(suitable, 1, v.b_constant);
+    }
+  }
+  return v;
+}
+
+/** Tests for a constant value. */
 static bool IsConstantValue(InductionVarRange::Value v) {
   return v.is_known && v.a_constant == 0;
 }
@@ -97,7 +165,7 @@
   }
 }
 
-/** Helper method to insert an instruction. */
+/** Inserts an instruction. */
 static HInstruction* Insert(HBasicBlock* block, HInstruction* instruction) {
   DCHECK(block != nullptr);
   DCHECK(block->GetLastInstruction() != nullptr) << block->GetBlockId();
@@ -106,7 +174,7 @@
   return instruction;
 }
 
-/** Helper method to obtain loop's control instruction. */
+/** Obtains loop's control instruction. */
 static HInstruction* GetLoopControl(HLoopInformation* loop) {
   DCHECK(loop != nullptr);
   return loop->GetHeader()->GetLastInstruction();
@@ -150,9 +218,14 @@
   chase_hint_ = chase_hint;
   bool in_body = context->GetBlock() != loop->GetHeader();
   int64_t stride_value = 0;
-  *min_val = GetVal(info, trip, in_body, /* is_min */ true);
-  *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false));
+  *min_val = SimplifyMin(GetVal(info, trip, in_body, /* is_min */ true));
+  *max_val = SimplifyMax(GetVal(info, trip, in_body, /* is_min */ false), chase_hint);
   *needs_finite_test = NeedsTripCount(info, &stride_value) && IsUnsafeTripCount(trip);
+  chase_hint_ = nullptr;
+  // Retry chasing constants for wrap-around (merge sensitive).
+  if (!min_val->is_known && info->induction_class == HInductionVarAnalysis::kWrapAround) {
+    *min_val = SimplifyMin(GetVal(info, trip, in_body, /* is_min */ true));
+  }
   return true;
 }
 
@@ -162,20 +235,20 @@
                                          /*out*/bool* needs_taken_test) {
   bool is_last_value = false;
   int64_t stride_value = 0;
-  return GenerateCode(context,
-                      instruction,
-                      is_last_value,
-                      nullptr,
-                      nullptr,
-                      nullptr,
-                      nullptr,
-                      nullptr,  // nothing generated yet
-                      &stride_value,
-                      needs_finite_test,
-                      needs_taken_test)
+  return GenerateRangeOrLastValue(context,
+                                  instruction,
+                                  is_last_value,
+                                  nullptr,
+                                  nullptr,
+                                  nullptr,
+                                  nullptr,
+                                  nullptr,  // nothing generated yet
+                                  &stride_value,
+                                  needs_finite_test,
+                                  needs_taken_test)
       && (stride_value == -1 ||
           stride_value == 0 ||
-          stride_value == 1);  // avoid wrap-around anomalies.
+          stride_value == 1);  // avoid arithmetic wrap-around anomalies.
 }
 
 void InductionVarRange::GenerateRange(HInstruction* context,
@@ -187,17 +260,17 @@
   bool is_last_value = false;
   int64_t stride_value = 0;
   bool b1, b2;  // unused
-  if (!GenerateCode(context,
-                    instruction,
-                    is_last_value,
-                    graph,
-                    block,
-                    lower,
-                    upper,
-                    nullptr,
-                    &stride_value,
-                    &b1,
-                    &b2)) {
+  if (!GenerateRangeOrLastValue(context,
+                                instruction,
+                                is_last_value,
+                                graph,
+                                block,
+                                lower,
+                                upper,
+                                nullptr,
+                                &stride_value,
+                                &b1,
+                                &b2)) {
     LOG(FATAL) << "Failed precondition: CanGenerateRange()";
   }
 }
@@ -209,17 +282,17 @@
   bool is_last_value = false;
   int64_t stride_value = 0;
   bool b1, b2;  // unused
-  if (!GenerateCode(context,
-                    context,
-                    is_last_value,
-                    graph,
-                    block,
-                    nullptr,
-                    nullptr,
-                    &taken_test,
-                    &stride_value,
-                    &b1,
-                    &b2)) {
+  if (!GenerateRangeOrLastValue(context,
+                                context,
+                                is_last_value,
+                                graph,
+                                block,
+                                nullptr,
+                                nullptr,
+                                &taken_test,
+                                &stride_value,
+                                &b1,
+                                &b2)) {
     LOG(FATAL) << "Failed precondition: CanGenerateRange()";
   }
   return taken_test;
@@ -230,17 +303,17 @@
   int64_t stride_value = 0;
   bool needs_finite_test = false;
   bool needs_taken_test = false;
-  return GenerateCode(instruction,
-                      instruction,
-                      is_last_value,
-                      nullptr,
-                      nullptr,
-                      nullptr,
-                      nullptr,
-                      nullptr,  // nothing generated yet
-                      &stride_value,
-                      &needs_finite_test,
-                      &needs_taken_test)
+  return GenerateRangeOrLastValue(instruction,
+                                  instruction,
+                                  is_last_value,
+                                  nullptr,
+                                  nullptr,
+                                  nullptr,
+                                  nullptr,
+                                  nullptr,  // nothing generated yet
+                                  &stride_value,
+                                  &needs_finite_test,
+                                  &needs_taken_test)
       && !needs_finite_test && !needs_taken_test;
 }
 
@@ -251,17 +324,17 @@
   bool is_last_value = true;
   int64_t stride_value = 0;
   bool b1, b2;  // unused
-  if (!GenerateCode(instruction,
-                    instruction,
-                    is_last_value,
-                    graph,
-                    block,
-                    &last_value,
-                    &last_value,
-                    nullptr,
-                    &stride_value,
-                    &b1,
-                    &b2)) {
+  if (!GenerateRangeOrLastValue(instruction,
+                                instruction,
+                                is_last_value,
+                                graph,
+                                block,
+                                &last_value,
+                                &last_value,
+                                nullptr,
+                                &stride_value,
+                                &b1,
+                                &b2)) {
     LOG(FATAL) << "Failed precondition: CanGenerateLastValue()";
   }
   return last_value;
@@ -280,6 +353,12 @@
   }
 }
 
+bool InductionVarRange::IsFinite(HLoopInformation* loop) const {
+  HInductionVarAnalysis::InductionInfo *trip =
+      induction_analysis_->LookupInfo(loop, GetLoopControl(loop));
+  return trip != nullptr && !IsUnsafeTripCount(trip);
+}
+
 //
 // Private class methods.
 //
@@ -296,7 +375,8 @@
         return true;
       }
     }
-    // Try range analysis on the invariant, but only on proper range to avoid wrap-around anomalies.
+    // Try range analysis on the invariant, only accept a proper range
+    // to avoid arithmetic wrap-around anomalies.
     Value min_val = GetVal(info, nullptr, /* in_body */ true, /* is_min */ true);
     Value max_val = GetVal(info, nullptr, /* in_body */ true, /* is_min */ false);
     if (IsConstantValue(min_val) &&
@@ -444,25 +524,26 @@
                                                      HInductionVarAnalysis::InductionInfo* trip,
                                                      bool in_body,
                                                      bool is_min) const {
-  // Stop chasing the instruction at constant or hint.
-  int64_t value;
-  if (IsIntAndGet(instruction, &value) && CanLongValueFitIntoInt(value)) {
-    return Value(static_cast<int32_t>(value));
-  } else if (instruction == chase_hint_) {
-    return Value(instruction, 1, 0);
-  }
-  // Special cases when encountering a single instruction that denotes trip count in the
-  // loop-body: min is 1 and, when chasing constants, max of safe trip-count is max int
-  if (in_body && trip != nullptr && instruction == trip->op_a->fetch) {
+  // Special case when chasing constants: single instruction that denotes trip count in the
+  // loop-body is minimal 1 and maximal, with safe trip-count, max int,
+  if (chase_hint_ == nullptr && in_body && trip != nullptr && instruction == trip->op_a->fetch) {
     if (is_min) {
       return Value(1);
-    } else if (chase_hint_ == nullptr && !IsUnsafeTripCount(trip)) {
+    } else if (!IsUnsafeTripCount(trip)) {
       return Value(std::numeric_limits<int32_t>::max());
     }
   }
-  // Chase the instruction a bit deeper into the HIR tree, so that it becomes more likely
-  // range analysis will compare the same instructions as terminal nodes.
-  if (instruction->IsAdd()) {
+  // Unless at a constant or hint, chase the instruction a bit deeper into the HIR tree, so that
+  // it becomes more likely range analysis will compare the same instructions as terminal nodes.
+  int64_t value;
+  if (IsIntAndGet(instruction, &value) && CanLongValueFitIntoInt(value)) {
+    // Proper constant reveals best information.
+    return Value(static_cast<int32_t>(value));
+  } else if (instruction == chase_hint_) {
+    // At hint, fetch is represented by itself.
+    return Value(instruction, 1, 0);
+  } else if (instruction->IsAdd()) {
+    // Incorporate suitable constants in the chased value.
     if (IsIntAndGet(instruction->InputAt(0), &value) && CanLongValueFitIntoInt(value)) {
       return AddValue(Value(static_cast<int32_t>(value)),
                       GetFetch(instruction->InputAt(1), trip, in_body, is_min));
@@ -471,14 +552,14 @@
                       Value(static_cast<int32_t>(value)));
     }
   } else if (instruction->IsArrayLength()) {
-    // Return extreme values when chasing constants. Otherwise, chase deeper.
+    // Exploit length properties when chasing constants or chase into a new array declaration.
     if (chase_hint_ == nullptr) {
       return is_min ? Value(0) : Value(std::numeric_limits<int32_t>::max());
     } else if (instruction->InputAt(0)->IsNewArray()) {
       return GetFetch(instruction->InputAt(0)->InputAt(0), trip, in_body, is_min);
     }
   } else if (instruction->IsTypeConversion()) {
-    // Since analysis is 32-bit (or narrower) we allow a widening along the path.
+    // Since analysis is 32-bit (or narrower), chase beyond widening along the path.
     if (instruction->AsTypeConversion()->GetInputType() == Primitive::kPrimInt &&
         instruction->AsTypeConversion()->GetResultType() == Primitive::kPrimLong) {
       return GetFetch(instruction->InputAt(0), trip, in_body, is_min);
@@ -500,6 +581,7 @@
       !IsUnsafeTripCount(next_trip)) {
     return GetVal(next_info, next_trip, next_in_body, is_min);
   }
+  // Fetch is represented by itself.
   return Value(instruction, 1, 0);
 }
 
@@ -525,6 +607,8 @@
             return GetMul(info->op_a, info->op_b, trip, in_body, is_min);
           case HInductionVarAnalysis::kDiv:
             return GetDiv(info->op_a, info->op_b, trip, in_body, is_min);
+          case HInductionVarAnalysis::kXor:
+            return GetXor(info->op_a, info->op_b);
           case HInductionVarAnalysis::kFetch:
             return GetFetch(info->fetch, trip, in_body, is_min);
           case HInductionVarAnalysis::kTripCountInLoop:
@@ -626,6 +710,21 @@
   return Value();
 }
 
+InductionVarRange::Value InductionVarRange::GetXor(
+    HInductionVarAnalysis::InductionInfo* info1,
+    HInductionVarAnalysis::InductionInfo* info2) const {
+  int64_t v1 = 0;
+  int64_t v2 = 0;
+  // Only accept exact values.
+  if (IsConstant(info1, kExact, &v1) && IsConstant(info2, kExact, &v2)) {
+    int64_t value = v1 ^ v2;
+    if (CanLongValueFitIntoInt(value)) {
+      return Value(static_cast<int32_t>(value));
+    }
+  }
+  return Value();
+}
+
 InductionVarRange::Value InductionVarRange::MulRangeAndConstant(
     int64_t value,
     HInductionVarAnalysis::InductionInfo* info,
@@ -715,17 +814,17 @@
   return Value();
 }
 
-bool InductionVarRange::GenerateCode(HInstruction* context,
-                                     HInstruction* instruction,
-                                     bool is_last_value,
-                                     HGraph* graph,
-                                     HBasicBlock* block,
-                                     /*out*/HInstruction** lower,
-                                     /*out*/HInstruction** upper,
-                                     /*out*/HInstruction** taken_test,
-                                     /*out*/int64_t* stride_value,
-                                     /*out*/bool* needs_finite_test,
-                                     /*out*/bool* needs_taken_test) const {
+bool InductionVarRange::GenerateRangeOrLastValue(HInstruction* context,
+                                                 HInstruction* instruction,
+                                                 bool is_last_value,
+                                                 HGraph* graph,
+                                                 HBasicBlock* block,
+                                                 /*out*/HInstruction** lower,
+                                                 /*out*/HInstruction** upper,
+                                                 /*out*/HInstruction** taken_test,
+                                                 /*out*/int64_t* stride_value,
+                                                 /*out*/bool* needs_finite_test,
+                                                 /*out*/bool* needs_taken_test) const {
   HLoopInformation* loop = nullptr;
   HInductionVarAnalysis::InductionInfo* info = nullptr;
   HInductionVarAnalysis::InductionInfo* trip = nullptr;
@@ -743,12 +842,17 @@
   *needs_taken_test = IsBodyTripCount(trip);
   // Handle last value request.
   if (is_last_value) {
-    if (info->induction_class != HInductionVarAnalysis::kLinear) {
-      return false;
-    } else if (*stride_value > 0) {
-      lower = nullptr;
+    if (info->induction_class == HInductionVarAnalysis::kLinear) {
+      if (*stride_value > 0) {
+        lower = nullptr;
+      } else {
+        upper = nullptr;
+      }
+    } else if (info->induction_class == HInductionVarAnalysis::kPeriodic) {
+      DCHECK(!in_body);
+      return GenerateLastValuePeriodic(info, trip, graph, block, lower, needs_taken_test);
     } else {
-      upper = nullptr;
+      return false;
     }
   }
   // Code generation for taken test: generate the code when requested or otherwise analyze
@@ -770,6 +874,56 @@
       GenerateCode(info, trip, graph, block, upper, in_body, /* is_min */ false);
 }
 
+bool InductionVarRange::GenerateLastValuePeriodic(HInductionVarAnalysis::InductionInfo* info,
+                                                  HInductionVarAnalysis::InductionInfo* trip,
+                                                  HGraph* graph,
+                                                  HBasicBlock* block,
+                                                  /*out*/HInstruction** result,
+                                                  /*out*/bool* needs_taken_test) const {
+  DCHECK(info->induction_class == HInductionVarAnalysis::kPeriodic);
+  // Count period.
+  int32_t period = 1;
+  for (HInductionVarAnalysis::InductionInfo* p = info;
+       p->induction_class == HInductionVarAnalysis::kPeriodic;
+       p = p->op_b, ++period) {}
+  // Handle periodic(x, y) case for restricted types.
+  if (period != 2 ||
+      trip->op_a->type != Primitive::kPrimInt ||
+      (info->type != Primitive::kPrimInt && info->type != Primitive::kPrimBoolean)) {
+    return false;  // TODO: easy to generalize
+  }
+  HInstruction* x_instr = nullptr;
+  HInstruction* y_instr = nullptr;
+  HInstruction* trip_expr = nullptr;
+  if (GenerateCode(info->op_a, nullptr, graph, block, graph ? &x_instr   : nullptr, false, false) &&
+      GenerateCode(info->op_b, nullptr, graph, block, graph ? &y_instr   : nullptr, false, false) &&
+      GenerateCode(trip->op_a, nullptr, graph, block, graph ? &trip_expr : nullptr, false, false)) {
+    // During actual code generation (graph != nullptr),
+    // generate is_even ? x : y select instruction.
+    if (graph != nullptr) {
+      HInstruction* is_even = Insert(block, new (graph->GetArena()) HEqual(
+          Insert(block, new (graph->GetArena()) HAnd(
+              Primitive::kPrimInt, trip_expr, graph->GetIntConstant(1))),
+          graph->GetIntConstant(0), kNoDexPc));
+      *result = Insert(block, new (graph->GetArena()) HSelect(is_even, x_instr, y_instr, kNoDexPc));
+    }
+    // Guard select with taken test if needed.
+    if (*needs_taken_test) {
+      HInstruction* taken_test = nullptr;
+      if (!GenerateCode(
+          trip->op_b, nullptr, graph, block, graph ? &taken_test : nullptr, false, false)) {
+        return false;
+      } else if (graph != nullptr) {
+         *result = Insert(block,
+                          new (graph->GetArena()) HSelect(taken_test, *result, x_instr, kNoDexPc));
+      }
+      *needs_taken_test = false;  // taken care of
+    }
+    return true;
+  }
+  return false;
+}
+
 bool InductionVarRange::GenerateCode(HInductionVarAnalysis::InductionInfo* info,
                                      HInductionVarAnalysis::InductionInfo* trip,
                                      HGraph* graph,  // when set, code is generated
@@ -784,7 +938,7 @@
     }
     // Verify type safety.
     Primitive::Type type = Primitive::kPrimInt;
-    if (info->type != type) {
+    if (info->type != Primitive::kPrimInt && info->type != Primitive::kPrimBoolean) {
       return false;
     }
     // Handle current operation.
@@ -792,9 +946,11 @@
     HInstruction* opb = nullptr;
     switch (info->induction_class) {
       case HInductionVarAnalysis::kInvariant:
-        // Invariants.
+        // Invariants (note that even though is_min does not impact code generation for
+        // invariants, some effort is made to keep this parameter consistent).
         switch (info->operation) {
           case HInductionVarAnalysis::kAdd:
+          case HInductionVarAnalysis::kXor:  // no proper is_min for second arg
           case HInductionVarAnalysis::kLT:
           case HInductionVarAnalysis::kLE:
           case HInductionVarAnalysis::kGT:
@@ -806,6 +962,8 @@
                 switch (info->operation) {
                   case HInductionVarAnalysis::kAdd:
                     operation = new (graph->GetArena()) HAdd(type, opa, opb); break;
+                  case HInductionVarAnalysis::kXor:
+                    operation = new (graph->GetArena()) HXor(type, opa, opb); break;
                   case HInductionVarAnalysis::kLT:
                     operation = new (graph->GetArena()) HLessThan(opa, opb); break;
                   case HInductionVarAnalysis::kLE:
diff --git a/compiler/optimizing/induction_var_range.h b/compiler/optimizing/induction_var_range.h
index 63850b3..034cf32 100644
--- a/compiler/optimizing/induction_var_range.h
+++ b/compiler/optimizing/induction_var_range.h
@@ -131,6 +131,29 @@
    */
   void Replace(HInstruction* instruction, HInstruction* fetch, HInstruction* replacement);
 
+  /**
+   * Incrementally updates induction information for just the given loop.
+   */
+  void ReVisit(HLoopInformation* loop) {
+    induction_analysis_->induction_.erase(loop);
+    for (HInstructionIterator it(loop->GetHeader()->GetPhis()); !it.Done(); it.Advance()) {
+      induction_analysis_->cycles_.erase(it.Current()->AsPhi());
+    }
+    induction_analysis_->VisitLoop(loop);
+  }
+
+  /**
+   * Lookup an interesting cycle associated with an entry phi.
+   */
+  ArenaSet<HInstruction*>* LookupCycle(HPhi* phi) const {
+    return induction_analysis_->LookupCycle(phi);
+  }
+
+  /**
+   * Checks if header logic of a loop terminates.
+   */
+  bool IsFinite(HLoopInformation* loop) const;
+
  private:
   /*
    * Enum used in IsConstant() request.
@@ -185,6 +208,8 @@
                HInductionVarAnalysis::InductionInfo* trip,
                bool in_body,
                bool is_min) const;
+  Value GetXor(HInductionVarAnalysis::InductionInfo* info1,
+               HInductionVarAnalysis::InductionInfo* info2) const;
 
   Value MulRangeAndConstant(int64_t value,
                             HInductionVarAnalysis::InductionInfo* info,
@@ -208,17 +233,24 @@
    * success. With values nullptr, the method can be used to determine if code generation
    * would be successful without generating actual code yet.
    */
-  bool GenerateCode(HInstruction* context,
-                    HInstruction* instruction,
-                    bool is_last_val,
-                    HGraph* graph,
-                    HBasicBlock* block,
-                    /*out*/ HInstruction** lower,
-                    /*out*/ HInstruction** upper,
-                    /*out*/ HInstruction** taken_test,
-                    /*out*/ int64_t* stride_value,
-                    /*out*/ bool* needs_finite_test,
-                    /*out*/ bool* needs_taken_test) const;
+  bool GenerateRangeOrLastValue(HInstruction* context,
+                                HInstruction* instruction,
+                                bool is_last_val,
+                                HGraph* graph,
+                                HBasicBlock* block,
+                                /*out*/ HInstruction** lower,
+                                /*out*/ HInstruction** upper,
+                                /*out*/ HInstruction** taken_test,
+                                /*out*/ int64_t* stride_value,
+                                /*out*/ bool* needs_finite_test,
+                                /*out*/ bool* needs_taken_test) const;
+
+  bool GenerateLastValuePeriodic(HInductionVarAnalysis::InductionInfo* info,
+                                 HInductionVarAnalysis::InductionInfo* trip,
+                                 HGraph* graph,
+                                 HBasicBlock* block,
+                                 /*out*/HInstruction** result,
+                                 /*out*/ bool* needs_taken_test) const;
 
   bool GenerateCode(HInductionVarAnalysis::InductionInfo* info,
                     HInductionVarAnalysis::InductionInfo* trip,
diff --git a/compiler/optimizing/inliner.cc b/compiler/optimizing/inliner.cc
index 6080551..7fe54b9 100644
--- a/compiler/optimizing/inliner.cc
+++ b/compiler/optimizing/inliner.cc
@@ -90,14 +90,14 @@
         if (!TryInline(call)) {
           if (kIsDebugBuild && IsCompilingWithCoreImage()) {
             std::string callee_name =
-                PrettyMethod(call->GetDexMethodIndex(), *outer_compilation_unit_.GetDexFile());
+                outer_compilation_unit_.GetDexFile()->PrettyMethod(call->GetDexMethodIndex());
             bool should_inline = callee_name.find("$inline$") != std::string::npos;
             CHECK(!should_inline) << "Could not inline " << callee_name;
           }
         } else {
           if (kIsDebugBuild && IsCompilingWithCoreImage()) {
             std::string callee_name =
-                PrettyMethod(call->GetDexMethodIndex(), *outer_compilation_unit_.GetDexFile());
+                outer_compilation_unit_.GetDexFile()->PrettyMethod(call->GetDexMethodIndex());
             bool must_not_inline = callee_name.find("$noinline$") != std::string::npos;
             CHECK(!must_not_inline) << "Should not have inlined " << callee_name;
           }
@@ -203,10 +203,10 @@
     REQUIRES_SHARED(Locks::mutator_lock_) {
   uint32_t index = DexFile::kDexNoIndex;
   if (cls->GetDexCache() == nullptr) {
-    DCHECK(cls->IsArrayClass()) << PrettyClass(cls);
+    DCHECK(cls->IsArrayClass()) << cls->PrettyClass();
     index = cls->FindTypeIndexInOtherDexFile(dex_file);
   } else if (cls->GetDexTypeIndex() == DexFile::kDexNoIndex16) {
-    DCHECK(cls->IsProxyClass()) << PrettyClass(cls);
+    DCHECK(cls->IsProxyClass()) << cls->PrettyClass();
     // TODO: deal with proxy classes.
   } else if (IsSameDexFile(cls->GetDexFile(), dex_file)) {
     DCHECK_EQ(cls->GetDexCache(), dex_cache.Get());
@@ -266,7 +266,7 @@
   ScopedObjectAccess soa(Thread::Current());
   uint32_t method_index = invoke_instruction->GetDexMethodIndex();
   const DexFile& caller_dex_file = *caller_compilation_unit_.GetDexFile();
-  VLOG(compiler) << "Try inlining " << PrettyMethod(method_index, caller_dex_file);
+  VLOG(compiler) << "Try inlining " << caller_dex_file.PrettyMethod(method_index);
 
   // We can query the dex cache directly. The verifier has populated it already.
   ArtMethod* resolved_method = invoke_instruction->GetResolvedMethod();
@@ -304,7 +304,7 @@
       const InlineCache& ic = *profiling_info->GetInlineCache(invoke_instruction->GetDexPc());
       if (ic.IsUninitialized()) {
         VLOG(compiler) << "Interface or virtual call to "
-                       << PrettyMethod(method_index, caller_dex_file)
+                       << caller_dex_file.PrettyMethod(method_index)
                        << " is not hit and not inlined";
         return false;
       } else if (ic.IsMonomorphic()) {
@@ -322,7 +322,7 @@
       } else {
         DCHECK(ic.IsMegamorphic());
         VLOG(compiler) << "Interface or virtual call to "
-                       << PrettyMethod(method_index, caller_dex_file)
+                       << caller_dex_file.PrettyMethod(method_index)
                        << " is megamorphic and not inlined";
         MaybeRecordStat(kMegamorphicCall);
         return false;
@@ -331,7 +331,7 @@
   }
 
   VLOG(compiler) << "Interface or virtual call to "
-                 << PrettyMethod(method_index, caller_dex_file)
+                 << caller_dex_file.PrettyMethod(method_index)
                  << " could not be statically determined";
   return false;
 }
@@ -366,7 +366,7 @@
   uint32_t class_index = FindClassIndexIn(
       ic.GetMonomorphicType(), caller_dex_file, caller_compilation_unit_.GetDexCache());
   if (class_index == DexFile::kDexNoIndex) {
-    VLOG(compiler) << "Call to " << PrettyMethod(resolved_method)
+    VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method)
                    << " from inline cache is not inlined because its class is not"
                    << " accessible to the caller";
     return false;
@@ -526,7 +526,7 @@
   }
 
   if (!one_target_inlined) {
-    VLOG(compiler) << "Call to " << PrettyMethod(resolved_method)
+    VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method)
                    << " from inline cache is not inlined because none"
                    << " of its targets could be inlined";
     return false;
@@ -660,7 +660,7 @@
       actual_method = new_method;
     } else if (actual_method != new_method) {
       // Different methods, bailout.
-      VLOG(compiler) << "Call to " << PrettyMethod(resolved_method)
+      VLOG(compiler) << "Call to " << ArtMethod::PrettyMethod(resolved_method)
                      << " from inline cache is not inlined because it resolves"
                      << " to different methods";
       return false;
@@ -794,7 +794,7 @@
                                  ArtMethod* method,
                                  HInstruction** return_replacement) {
   if (method->IsProxyMethod()) {
-    VLOG(compiler) << "Method " << PrettyMethod(method)
+    VLOG(compiler) << "Method " << method->PrettyMethod()
                    << " is not inlined because of unimplemented inline support for proxy methods.";
     return false;
   }
@@ -804,11 +804,12 @@
   if (!compiler_driver_->MayInline(method->GetDexFile(),
                                    outer_compilation_unit_.GetDexFile())) {
     if (TryPatternSubstitution(invoke_instruction, method, return_replacement)) {
-      VLOG(compiler) << "Successfully replaced pattern of invoke " << PrettyMethod(method);
+      VLOG(compiler) << "Successfully replaced pattern of invoke "
+                     << method->PrettyMethod();
       MaybeRecordStat(kReplacedInvokeWithSimplePattern);
       return true;
     }
-    VLOG(compiler) << "Won't inline " << PrettyMethod(method) << " in "
+    VLOG(compiler) << "Won't inline " << method->PrettyMethod() << " in "
                    << outer_compilation_unit_.GetDexFile()->GetLocation() << " ("
                    << caller_compilation_unit_.GetDexFile()->GetLocation() << ") from "
                    << method->GetDexFile()->GetLocation();
@@ -820,14 +821,14 @@
   const DexFile::CodeItem* code_item = method->GetCodeItem();
 
   if (code_item == nullptr) {
-    VLOG(compiler) << "Method " << PrettyMethod(method)
+    VLOG(compiler) << "Method " << method->PrettyMethod()
                    << " is not inlined because it is native";
     return false;
   }
 
   size_t inline_max_code_units = compiler_driver_->GetCompilerOptions().GetInlineMaxCodeUnits();
   if (code_item->insns_size_in_code_units_ > inline_max_code_units) {
-    VLOG(compiler) << "Method " << PrettyMethod(method)
+    VLOG(compiler) << "Method " << method->PrettyMethod()
                    << " is too big to inline: "
                    << code_item->insns_size_in_code_units_
                    << " > "
@@ -836,13 +837,13 @@
   }
 
   if (code_item->tries_size_ != 0) {
-    VLOG(compiler) << "Method " << PrettyMethod(method)
+    VLOG(compiler) << "Method " << method->PrettyMethod()
                    << " is not inlined because of try block";
     return false;
   }
 
   if (!method->IsCompilable()) {
-    VLOG(compiler) << "Method " << PrettyMethod(method)
+    VLOG(compiler) << "Method " << method->PrettyMethod()
                    << " has soft failures un-handled by the compiler, so it cannot be inlined";
   }
 
@@ -851,7 +852,7 @@
     if (Runtime::Current()->UseJitCompilation() ||
         !compiler_driver_->IsMethodVerifiedWithoutFailures(
             method->GetDexMethodIndex(), class_def_idx, *method->GetDexFile())) {
-      VLOG(compiler) << "Method " << PrettyMethod(method)
+      VLOG(compiler) << "Method " << method->PrettyMethod()
                      << " couldn't be verified, so it cannot be inlined";
       return false;
     }
@@ -861,7 +862,7 @@
       invoke_instruction->AsInvokeStaticOrDirect()->IsStaticWithImplicitClinitCheck()) {
     // Case of a static method that cannot be inlined because it implicitly
     // requires an initialization check of its declaring class.
-    VLOG(compiler) << "Method " << PrettyMethod(method)
+    VLOG(compiler) << "Method " << method->PrettyMethod()
                    << " is not inlined because it is static and requires a clinit"
                    << " check that cannot be emitted due to Dex cache limitations";
     return false;
@@ -871,7 +872,7 @@
     return false;
   }
 
-  VLOG(compiler) << "Successfully inlined " << PrettyMethod(method);
+  VLOG(compiler) << "Successfully inlined " << method->PrettyMethod();
   MaybeRecordStat(kInlinedInvoke);
   return true;
 }
@@ -1143,14 +1144,14 @@
                         handles_);
 
   if (builder.BuildGraph() != kAnalysisSuccess) {
-    VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+    VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
                    << " could not be built, so cannot be inlined";
     return false;
   }
 
   if (!RegisterAllocator::CanAllocateRegistersFor(*callee_graph,
                                                   compiler_driver_->GetInstructionSet())) {
-    VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+    VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
                    << " cannot be inlined because of the register allocator";
     return false;
   }
@@ -1200,7 +1201,7 @@
   // a throw predecessor.
   HBasicBlock* exit_block = callee_graph->GetExitBlock();
   if (exit_block == nullptr) {
-    VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+    VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
                    << " could not be inlined because it has an infinite loop";
     return false;
   }
@@ -1213,55 +1214,62 @@
     }
   }
   if (has_throw_predecessor) {
-    VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+    VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
                    << " could not be inlined because one branch always throws";
     return false;
   }
 
-  HReversePostOrderIterator it(*callee_graph);
-  it.Advance();  // Past the entry block, it does not contain instructions that prevent inlining.
   size_t number_of_instructions = 0;
 
   bool can_inline_environment =
       total_number_of_dex_registers_ < kMaximumNumberOfCumulatedDexRegisters;
 
-  for (; !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
-
-    if (block->IsLoopHeader() && block->GetLoopInformation()->IsIrreducible()) {
-      // Don't inline methods with irreducible loops, they could prevent some
-      // optimizations to run.
-      VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
-                     << " could not be inlined because it contains an irreducible loop";
-      return false;
+  // Skip the entry block, it does not contain instructions that prevent inlining.
+  for (HBasicBlock* block : callee_graph->GetReversePostOrderSkipEntryBlock()) {
+    if (block->IsLoopHeader()) {
+      if (block->GetLoopInformation()->IsIrreducible()) {
+        // Don't inline methods with irreducible loops, they could prevent some
+        // optimizations to run.
+        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
+                       << " could not be inlined because it contains an irreducible loop";
+        return false;
+      }
+      if (!block->GetLoopInformation()->HasExitEdge()) {
+        // Don't inline methods with loops without exit, since they cause the
+        // loop information to be computed incorrectly when updating after
+        // inlining.
+        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
+                       << " could not be inlined because it contains a loop with no exit";
+        return false;
+      }
     }
 
     for (HInstructionIterator instr_it(block->GetInstructions());
          !instr_it.Done();
          instr_it.Advance()) {
       if (number_of_instructions++ == number_of_instructions_budget) {
-        VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
                        << " is not inlined because its caller has reached"
                        << " its instruction budget limit.";
         return false;
       }
       HInstruction* current = instr_it.Current();
       if (!can_inline_environment && current->NeedsEnvironment()) {
-        VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
                        << " is not inlined because its caller has reached"
                        << " its environment budget limit.";
         return false;
       }
 
       if (!same_dex_file && current->NeedsEnvironment()) {
-        VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
                        << " could not be inlined because " << current->DebugName()
                        << " needs an environment and is in a different dex file";
         return false;
       }
 
       if (!same_dex_file && current->NeedsDexCacheOfDeclaringClass()) {
-        VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
                        << " could not be inlined because " << current->DebugName()
                        << " it is in a different dex file and requires access to the dex cache";
         return false;
@@ -1269,7 +1277,7 @@
 
       if (current->IsNewInstance() &&
           (current->AsNewInstance()->GetEntrypoint() == kQuickAllocObjectWithAccessCheck)) {
-        VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
                        << " could not be inlined because it is using an entrypoint"
                        << " with access checks";
         // Allocation entrypoint does not handle inlined frames.
@@ -1278,7 +1286,7 @@
 
       if (current->IsNewArray() &&
           (current->AsNewArray()->GetEntrypoint() == kQuickAllocArrayWithAccessCheck)) {
-        VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
                        << " could not be inlined because it is using an entrypoint"
                        << " with access checks";
         // Allocation entrypoint does not handle inlined frames.
@@ -1290,7 +1298,7 @@
           current->IsUnresolvedStaticFieldSet() ||
           current->IsUnresolvedInstanceFieldSet()) {
         // Entrypoint for unresolved fields does not handle inlined frames.
-        VLOG(compiler) << "Method " << PrettyMethod(method_index, callee_dex_file)
+        VLOG(compiler) << "Method " << callee_dex_file.PrettyMethod(method_index)
                        << " could not be inlined because it is using an unresolved"
                        << " entrypoint";
         return false;
@@ -1317,8 +1325,8 @@
                                   const DexCompilationUnit& dex_compilation_unit) {
   // Note: if the outermost_graph_ is being compiled OSR, we should not run any
   // optimization that could lead to a HDeoptimize. The following optimizations do not.
-  HDeadCodeElimination dce(callee_graph, stats_);
-  HConstantFolding fold(callee_graph);
+  HDeadCodeElimination dce(callee_graph, stats_, "dead_code_elimination$inliner");
+  HConstantFolding fold(callee_graph, "constant_folding$inliner");
   HSharpening sharpening(callee_graph, codegen_, dex_compilation_unit, compiler_driver_);
   InstructionSimplifier simplify(callee_graph, stats_);
   IntrinsicsRecognizer intrinsics(callee_graph, stats_);
diff --git a/compiler/optimizing/inliner.h b/compiler/optimizing/inliner.h
index 486626b..a1dcd58 100644
--- a/compiler/optimizing/inliner.h
+++ b/compiler/optimizing/inliner.h
@@ -38,7 +38,7 @@
            const DexCompilationUnit& outer_compilation_unit,
            const DexCompilationUnit& caller_compilation_unit,
            CompilerDriver* compiler_driver,
-           StackHandleScopeCollection* handles,
+           VariableSizedHandleScope* handles,
            OptimizingCompilerStats* stats,
            size_t total_number_of_dex_registers,
            size_t depth)
@@ -197,7 +197,7 @@
   const size_t total_number_of_dex_registers_;
   const size_t depth_;
   size_t number_of_inlined_instructions_;
-  StackHandleScopeCollection* const handles_;
+  VariableSizedHandleScope* const handles_;
 
   DISALLOW_COPY_AND_ASSIGN(HInliner);
 };
diff --git a/compiler/optimizing/instruction_builder.cc b/compiler/optimizing/instruction_builder.cc
index f7d67db..c8c4ca7 100644
--- a/compiler/optimizing/instruction_builder.cc
+++ b/compiler/optimizing/instruction_builder.cc
@@ -81,8 +81,7 @@
       // locals (guaranteed by HGraphBuilder) and that all try blocks have been
       // visited already (from HTryBoundary scoping and reverse post order).
       bool catch_block_visited = false;
-      for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-        HBasicBlock* current = it.Current();
+      for (HBasicBlock* current : graph_->GetReversePostOrder()) {
         if (current == current_block_) {
           catch_block_visited = true;
         } else if (current->IsTryBlock()) {
@@ -276,8 +275,8 @@
     FindNativeDebugInfoLocations(native_debug_info_locations);
   }
 
-  for (HReversePostOrderIterator block_it(*graph_); !block_it.Done(); block_it.Advance()) {
-    current_block_ = block_it.Current();
+  for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+    current_block_ = block;
     uint32_t block_dex_pc = current_block_->GetDexPc();
 
     InitializeBlockLocals();
@@ -1065,7 +1064,7 @@
       // reject any class where this is violated. However, the verifier only does these checks
       // on non trivially dead instructions, so we just bailout the compilation.
       VLOG(compiler) << "Did not compile "
-                     << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
+                     << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
                      << " because of non-sequential dex register pair in wide argument";
       MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
       return false;
@@ -1079,7 +1078,7 @@
 
   if (*argument_index != invoke->GetNumberOfArguments()) {
     VLOG(compiler) << "Did not compile "
-                   << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
+                   << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
                    << " because of wrong number of arguments in invoke instruction";
     MaybeRecordStat(MethodCompilationStat::kNotCompiledMalformedOpcode);
     return false;
@@ -2716,7 +2715,7 @@
 
     default:
       VLOG(compiler) << "Did not compile "
-                     << PrettyMethod(dex_compilation_unit_->GetDexMethodIndex(), *dex_file_)
+                     << dex_file_->PrettyMethod(dex_compilation_unit_->GetDexMethodIndex())
                      << " because of unhandled instruction "
                      << instruction.Name();
       MaybeRecordStat(MethodCompilationStat::kNotCompiledUnhandledInstruction);
diff --git a/compiler/optimizing/instruction_simplifier.cc b/compiler/optimizing/instruction_simplifier.cc
index 3bb1c1d..85b461d 100644
--- a/compiler/optimizing/instruction_simplifier.cc
+++ b/compiler/optimizing/instruction_simplifier.cc
@@ -106,14 +106,17 @@
   void SimplifyFP2Int(HInvoke* invoke);
   void SimplifyStringCharAt(HInvoke* invoke);
   void SimplifyStringIsEmptyOrLength(HInvoke* invoke);
+  void SimplifyNPEOnArgN(HInvoke* invoke, size_t);
   void SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind);
 
   OptimizingCompilerStats* stats_;
   bool simplification_occurred_ = false;
   int simplifications_at_current_position_ = 0;
-  // We ensure we do not loop infinitely. The value is a finger in the air guess
-  // that should allow enough simplification.
-  static constexpr int kMaxSamePositionSimplifications = 10;
+  // We ensure we do not loop infinitely. The value should not be too high, since that
+  // would allow looping around the same basic block too many times. The value should
+  // not be too low either, however, since we want to allow revisiting a basic block
+  // with many statements and simplifications at least once.
+  static constexpr int kMaxSamePositionSimplifications = 50;
 };
 
 void InstructionSimplifier::Run() {
@@ -124,20 +127,16 @@
 void InstructionSimplifierVisitor::Run() {
   // Iterate in reverse post order to open up more simplifications to users
   // of instructions that got simplified.
-  for (HReversePostOrderIterator it(*GetGraph()); !it.Done();) {
+  for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
     // The simplification of an instruction to another instruction may yield
     // possibilities for other simplifications. So although we perform a reverse
     // post order visit, we sometimes need to revisit an instruction index.
-    simplification_occurred_ = false;
-    VisitBasicBlock(it.Current());
-    if (simplification_occurred_ &&
-        (simplifications_at_current_position_ < kMaxSamePositionSimplifications)) {
-      // New simplifications may be applicable to the instruction at the
-      // current index, so don't advance the iterator.
-      continue;
-    }
+    do {
+      simplification_occurred_ = false;
+      VisitBasicBlock(block);
+    } while (simplification_occurred_ &&
+             (simplifications_at_current_position_ < kMaxSamePositionSimplifications));
     simplifications_at_current_position_ = 0;
-    it.Advance();
   }
 }
 
@@ -609,11 +608,23 @@
   return nullptr;
 }
 
+static bool CmpHasBoolType(HInstruction* input, HInstruction* cmp) {
+  if (input->GetType() == Primitive::kPrimBoolean) {
+    return true;  // input has direct boolean type
+  } else if (cmp->GetUses().HasExactlyOneElement()) {
+    // Comparison also has boolean type if both its input and the instruction
+    // itself feed into the same phi node.
+    HInstruction* user = cmp->GetUses().front().GetUser();
+    return user->IsPhi() && user->HasInput(input) && user->HasInput(cmp);
+  }
+  return false;
+}
+
 void InstructionSimplifierVisitor::VisitEqual(HEqual* equal) {
   HInstruction* input_const = equal->GetConstantRight();
   if (input_const != nullptr) {
     HInstruction* input_value = equal->GetLeastConstantLeft();
-    if (input_value->GetType() == Primitive::kPrimBoolean && input_const->IsIntConstant()) {
+    if (CmpHasBoolType(input_value, equal) && input_const->IsIntConstant()) {
       HBasicBlock* block = equal->GetBlock();
       // We are comparing the boolean to a constant which is of type int and can
       // be any constant.
@@ -623,6 +634,7 @@
         block->RemoveInstruction(equal);
         RecordSimplification();
       } else if (input_const->AsIntConstant()->IsFalse()) {
+        // Replace (bool_value == false) with !bool_value
         equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, equal));
         block->RemoveInstruction(equal);
         RecordSimplification();
@@ -644,11 +656,12 @@
   HInstruction* input_const = not_equal->GetConstantRight();
   if (input_const != nullptr) {
     HInstruction* input_value = not_equal->GetLeastConstantLeft();
-    if (input_value->GetType() == Primitive::kPrimBoolean && input_const->IsIntConstant()) {
+    if (CmpHasBoolType(input_value, not_equal) && input_const->IsIntConstant()) {
       HBasicBlock* block = not_equal->GetBlock();
       // We are comparing the boolean to a constant which is of type int and can
       // be any constant.
       if (input_const->AsIntConstant()->IsTrue()) {
+        // Replace (bool_value != true) with !bool_value
         not_equal->ReplaceWith(GetGraph()->InsertOppositeCondition(input_value, not_equal));
         block->RemoveInstruction(not_equal);
         RecordSimplification();
@@ -1846,6 +1859,16 @@
   invoke->GetBlock()->ReplaceAndRemoveInstructionWith(invoke, replacement);
 }
 
+// This method should only be used on intrinsics whose sole way of throwing an
+// exception is raising a NPE when the nth argument is null. If that argument
+// is provably non-null, we can clear the flag.
+void InstructionSimplifierVisitor::SimplifyNPEOnArgN(HInvoke* invoke, size_t n) {
+  HInstruction* arg = invoke->InputAt(n);
+  if (!arg->CanBeNull()) {
+    invoke->SetCanThrow(false);
+  }
+}
+
 void InstructionSimplifierVisitor::SimplifyMemBarrier(HInvoke* invoke, MemBarrierKind barrier_kind) {
   uint32_t dex_pc = invoke->GetDexPc();
   HMemoryBarrier* mem_barrier = new (GetGraph()->GetArena()) HMemoryBarrier(barrier_kind, dex_pc);
@@ -1899,6 +1922,10 @@
     case Intrinsics::kStringLength:
       SimplifyStringIsEmptyOrLength(instruction);
       break;
+    case Intrinsics::kStringStringIndexOf:
+    case Intrinsics::kStringStringIndexOfAfter:
+      SimplifyNPEOnArgN(instruction, 1);  // 0th has own NullCheck
+      break;
     case Intrinsics::kUnsafeLoadFence:
       SimplifyMemBarrier(instruction, MemBarrierKind::kLoadAny);
       break;
diff --git a/compiler/optimizing/instruction_simplifier_arm.h b/compiler/optimizing/instruction_simplifier_arm.h
index 782110c..9b54511 100644
--- a/compiler/optimizing/instruction_simplifier_arm.h
+++ b/compiler/optimizing/instruction_simplifier_arm.h
@@ -48,7 +48,7 @@
 class InstructionSimplifierArm : public HOptimization {
  public:
   InstructionSimplifierArm(HGraph* graph, OptimizingCompilerStats* stats)
-    : HOptimization(graph, kInstructionSimplifierArmPassName, stats) {}
+      : HOptimization(graph, kInstructionSimplifierArmPassName, stats) {}
 
   static constexpr const char* kInstructionSimplifierArmPassName = "instruction_simplifier_arm";
 
diff --git a/compiler/optimizing/instruction_simplifier_arm64.cc b/compiler/optimizing/instruction_simplifier_arm64.cc
index d0dd650..6d107d5 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.cc
+++ b/compiler/optimizing/instruction_simplifier_arm64.cc
@@ -140,13 +140,6 @@
 
 void InstructionSimplifierArm64Visitor::VisitArrayGet(HArrayGet* instruction) {
   size_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
-  // Don't move the array pointer if it is charAt because we need to take the count first.
-  // TODO: Implement reading (length + compression) for String compression feature from
-  // negative offset (count_offset - data_offset) using LDP and clobbering an extra temporary.
-  // Note that "LDR (Immediate)" does not have a "signed offset" encoding.
-  if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
-    return;
-  }
   if (TryExtractArrayAccessAddress(instruction,
                                    instruction->GetArray(),
                                    instruction->GetIndex(),
diff --git a/compiler/optimizing/instruction_simplifier_arm64.h b/compiler/optimizing/instruction_simplifier_arm64.h
index f71684e..d4cb1f1 100644
--- a/compiler/optimizing/instruction_simplifier_arm64.h
+++ b/compiler/optimizing/instruction_simplifier_arm64.h
@@ -82,9 +82,10 @@
 class InstructionSimplifierArm64 : public HOptimization {
  public:
   InstructionSimplifierArm64(HGraph* graph, OptimizingCompilerStats* stats)
-    : HOptimization(graph, kInstructionSimplifierArm64PassName, stats) {}
-  static constexpr const char* kInstructionSimplifierArm64PassName
-      = "instruction_simplifier_arm64";
+      : HOptimization(graph, kInstructionSimplifierArm64PassName, stats) {}
+
+  static constexpr const char* kInstructionSimplifierArm64PassName = "instruction_simplifier_arm64";
+
   void Run() OVERRIDE {
     InstructionSimplifierArm64Visitor visitor(graph_, stats_);
     visitor.VisitReversePostOrder();
diff --git a/compiler/optimizing/instruction_simplifier_shared.cc b/compiler/optimizing/instruction_simplifier_shared.cc
index 04e063c..c2b1374 100644
--- a/compiler/optimizing/instruction_simplifier_shared.cc
+++ b/compiler/optimizing/instruction_simplifier_shared.cc
@@ -231,15 +231,6 @@
                                   HInstruction* array,
                                   HInstruction* index,
                                   size_t data_offset) {
-  if (kEmitCompilerReadBarrier) {
-    // The read barrier instrumentation does not support the
-    // HIntermediateAddress instruction yet.
-    //
-    // TODO: Handle this case properly in the ARM64 and ARM code generator and
-    // re-enable this optimization; otherwise, remove this TODO.
-    // b/26601270
-    return false;
-  }
   if (index->IsConstant() ||
       (index->IsBoundsCheck() && index->AsBoundsCheck()->GetIndex()->IsConstant())) {
     // When the index is a constant all the addressing can be fitted in the
@@ -251,14 +242,20 @@
     // The access may require a runtime call or the original array pointer.
     return false;
   }
+  if (kEmitCompilerReadBarrier &&
+      access->IsArrayGet() &&
+      access->GetType() == Primitive::kPrimNot) {
+    // For object arrays, the read barrier instrumentation requires
+    // the original array pointer.
+    return false;
+  }
 
   // Proceed to extract the base address computation.
   HGraph* graph = access->GetBlock()->GetGraph();
   ArenaAllocator* arena = graph->GetArena();
 
   HIntConstant* offset = graph->GetIntConstant(data_offset);
-  HIntermediateAddress* address =
-      new (arena) HIntermediateAddress(array, offset, kNoDexPc);
+  HIntermediateAddress* address = new (arena) HIntermediateAddress(array, offset, kNoDexPc);
   // TODO: Is it ok to not have this on the intermediate address?
   // address->SetReferenceTypeInfo(array->GetReferenceTypeInfo());
   access->GetBlock()->InsertInstructionBefore(address, access);
diff --git a/compiler/optimizing/intrinsics.cc b/compiler/optimizing/intrinsics.cc
index 412ccfc..fc6ff7b 100644
--- a/compiler/optimizing/intrinsics.cc
+++ b/compiler/optimizing/intrinsics.cc
@@ -133,8 +133,7 @@
 
 void IntrinsicsRecognizer::Run() {
   ScopedObjectAccess soa(Thread::Current());
-  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
+  for (HBasicBlock* block : graph_->GetReversePostOrder()) {
     for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done();
          inst_it.Advance()) {
       HInstruction* inst = inst_it.Current();
@@ -146,7 +145,7 @@
           if (!CheckInvokeType(intrinsic, invoke)) {
             LOG(WARNING) << "Found an intrinsic with unexpected invoke type: "
                 << intrinsic << " for "
-                << PrettyMethod(invoke->GetDexMethodIndex(), invoke->GetDexFile())
+                << invoke->GetDexFile().PrettyMethod(invoke->GetDexMethodIndex())
                 << invoke->DebugName();
           } else {
             invoke->SetIntrinsic(intrinsic,
diff --git a/compiler/optimizing/intrinsics_arm.cc b/compiler/optimizing/intrinsics_arm.cc
index 96a6ecb..8234b24 100644
--- a/compiler/optimizing/intrinsics_arm.cc
+++ b/compiler/optimizing/intrinsics_arm.cc
@@ -652,9 +652,9 @@
       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           can_call ?
-                                                               LocationSummary::kCallOnSlowPath :
-                                                               LocationSummary::kNoCall,
+                                                           (can_call
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall),
                                                            kIntrinsified);
   if (can_call && kUseBakerReadBarrier) {
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
@@ -663,7 +663,7 @@
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister(),
-                    can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+                    (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
   if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
     // We need a temporary register for the read barrier marking slow
     // path in InstructionCodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier.
@@ -891,8 +891,13 @@
 static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena,
                                                 HInvoke* invoke,
                                                 Primitive::Type type) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      kUseBakerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
+                                                           (can_call
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall),
                                                            kIntrinsified);
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
@@ -901,36 +906,65 @@
   locations->SetInAt(4, Location::RequiresRegister());
 
   // If heap poisoning is enabled, we don't want the unpoisoning
-  // operations to potentially clobber the output.
-  Location::OutputOverlap overlaps = (kPoisonHeapReferences && type == Primitive::kPrimNot)
+  // operations to potentially clobber the output. Likewise when
+  // emitting a (Baker) read barrier, which may call.
+  Location::OutputOverlap overlaps =
+      ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call)
       ? Location::kOutputOverlap
       : Location::kNoOutputOverlap;
   locations->SetOut(Location::RequiresRegister(), overlaps);
 
+  // Temporary registers used in CAS. In the object case
+  // (UnsafeCASObject intrinsic), these are also used for
+  // card-marking, and possibly for (Baker) read barrier.
   locations->AddTemp(Location::RequiresRegister());  // Pointer.
   locations->AddTemp(Location::RequiresRegister());  // Temp 1.
 }
 
-static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM* codegen) {
+static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM* codegen) {
   DCHECK_NE(type, Primitive::kPrimLong);
 
   ArmAssembler* assembler = codegen->GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
 
-  Register out = locations->Out().AsRegister<Register>();              // Boolean result.
+  Location out_loc = locations->Out();
+  Register out = out_loc.AsRegister<Register>();                  // Boolean result.
 
-  Register base = locations->InAt(1).AsRegister<Register>();           // Object pointer.
-  Register offset = locations->InAt(2).AsRegisterPairLow<Register>();  // Offset (discard high 4B).
-  Register expected_lo = locations->InAt(3).AsRegister<Register>();    // Expected.
-  Register value_lo = locations->InAt(4).AsRegister<Register>();       // Value.
+  Register base = locations->InAt(1).AsRegister<Register>();      // Object pointer.
+  Location offset_loc = locations->InAt(2);
+  Register offset = offset_loc.AsRegisterPairLow<Register>();     // Offset (discard high 4B).
+  Register expected = locations->InAt(3).AsRegister<Register>();  // Expected.
+  Register value = locations->InAt(4).AsRegister<Register>();     // Value.
 
-  Register tmp_ptr = locations->GetTemp(0).AsRegister<Register>();     // Pointer to actual memory.
-  Register tmp_lo = locations->GetTemp(1).AsRegister<Register>();      // Value in memory.
+  Location tmp_ptr_loc = locations->GetTemp(0);
+  Register tmp_ptr = tmp_ptr_loc.AsRegister<Register>();          // Pointer to actual memory.
+  Register tmp = locations->GetTemp(1).AsRegister<Register>();    // Value in memory.
 
   if (type == Primitive::kPrimNot) {
+    // The only read barrier implementation supporting the
+    // UnsafeCASObject intrinsic is the Baker-style read barriers.
+    DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
     // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
     // object and scan the receiver at the next GC for nothing.
     bool value_can_be_null = true;  // TODO: Worth finding out this information?
-    codegen->MarkGCCard(tmp_ptr, tmp_lo, base, value_lo, value_can_be_null);
+    codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null);
+
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      // Need to make sure the reference stored in the field is a to-space
+      // one before attempting the CAS or the CAS could fail incorrectly.
+      codegen->GenerateReferenceLoadWithBakerReadBarrier(
+          invoke,
+          out_loc,  // Unused, used only as a "temporary" within the read barrier.
+          base,
+          /* offset */ 0u,
+          /* index */ offset_loc,
+          ScaleFactor::TIMES_1,
+          tmp_ptr_loc,
+          /* needs_null_check */ false,
+          /* always_update_field */ true,
+          &tmp);
+    }
   }
 
   // Prevent reordering with prior memory operations.
@@ -942,12 +976,12 @@
   __ add(tmp_ptr, base, ShifterOperand(offset));
 
   if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
-    codegen->GetAssembler()->PoisonHeapReference(expected_lo);
-    if (value_lo == expected_lo) {
-      // Do not poison `value_lo`, as it is the same register as
-      // `expected_lo`, which has just been poisoned.
+    __ PoisonHeapReference(expected);
+    if (value == expected) {
+      // Do not poison `value`, as it is the same register as
+      // `expected`, which has just been poisoned.
     } else {
-      codegen->GetAssembler()->PoisonHeapReference(value_lo);
+      __ PoisonHeapReference(value);
     }
   }
 
@@ -959,37 +993,29 @@
   Label loop_head;
   __ Bind(&loop_head);
 
-  // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
-  // the reference stored in the object before attempting the CAS,
-  // similar to the one in the art::Unsafe_compareAndSwapObject JNI
-  // implementation.
-  //
-  // Note that this code is not (yet) used when read barriers are
-  // enabled (see IntrinsicLocationsBuilderARM::VisitUnsafeCASObject).
-  DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
-  __ ldrex(tmp_lo, tmp_ptr);
+  __ ldrex(tmp, tmp_ptr);
 
-  __ subs(tmp_lo, tmp_lo, ShifterOperand(expected_lo));
+  __ subs(tmp, tmp, ShifterOperand(expected));
 
   __ it(EQ, ItState::kItT);
-  __ strex(tmp_lo, value_lo, tmp_ptr, EQ);
-  __ cmp(tmp_lo, ShifterOperand(1), EQ);
+  __ strex(tmp, value, tmp_ptr, EQ);
+  __ cmp(tmp, ShifterOperand(1), EQ);
 
   __ b(&loop_head, EQ);
 
   __ dmb(ISH);
 
-  __ rsbs(out, tmp_lo, ShifterOperand(1));
+  __ rsbs(out, tmp, ShifterOperand(1));
   __ it(CC);
   __ mov(out, ShifterOperand(0), CC);
 
   if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
-    codegen->GetAssembler()->UnpoisonHeapReference(expected_lo);
-    if (value_lo == expected_lo) {
-      // Do not unpoison `value_lo`, as it is the same register as
-      // `expected_lo`, which has just been unpoisoned.
+    __ UnpoisonHeapReference(expected);
+    if (value == expected) {
+      // Do not unpoison `value`, as it is the same register as
+      // `expected`, which has just been unpoisoned.
     } else {
-      codegen->GetAssembler()->UnpoisonHeapReference(value_lo);
+      __ UnpoisonHeapReference(value);
     }
   }
 }
@@ -998,33 +1024,23 @@
   CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimInt);
 }
 void IntrinsicLocationsBuilderARM::VisitUnsafeCASObject(HInvoke* invoke) {
-  // The UnsafeCASObject intrinsic is missing a read barrier, and
-  // therefore sometimes does not work as expected (b/25883050).
-  // Turn it off temporarily as a quick fix, until the read barrier is
-  // implemented (see TODO in GenCAS).
-  //
-  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
-  // this intrinsic.
-  if (kEmitCompilerReadBarrier) {
+  // The only read barrier implementation supporting the
+  // UnsafeCASObject intrinsic is the Baker-style read barriers.
+  if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
     return;
   }
 
   CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimNot);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafeCASInt(HInvoke* invoke) {
-  GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_);
+  GenCas(invoke, Primitive::kPrimInt, codegen_);
 }
 void IntrinsicCodeGeneratorARM::VisitUnsafeCASObject(HInvoke* invoke) {
-  // The UnsafeCASObject intrinsic is missing a read barrier, and
-  // therefore sometimes does not work as expected (b/25883050).
-  // Turn it off temporarily as a quick fix, until the read barrier is
-  // implemented (see TODO in GenCAS).
-  //
-  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
-  // this intrinsic.
-  DCHECK(!kEmitCompilerReadBarrier);
+  // The only read barrier implementation supporting the
+  // UnsafeCASObject intrinsic is the Baker-style read barriers.
+  DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
 
-  GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
+  GenCas(invoke, Primitive::kPrimNot, codegen_);
 }
 
 void IntrinsicLocationsBuilderARM::VisitStringCompareTo(HInvoke* invoke) {
@@ -1042,7 +1058,6 @@
   // Need temporary registers for String compression's feature.
   if (mirror::kUseStringCompression) {
     locations->AddTemp(Location::RequiresRegister());
-    locations->AddTemp(Location::RequiresRegister());
   }
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
 }
@@ -1058,10 +1073,9 @@
   Register temp0 = locations->GetTemp(0).AsRegister<Register>();
   Register temp1 = locations->GetTemp(1).AsRegister<Register>();
   Register temp2 = locations->GetTemp(2).AsRegister<Register>();
-  Register temp3, temp4;
+  Register temp3;
   if (mirror::kUseStringCompression) {
     temp3 = locations->GetTemp(3).AsRegister<Register>();
-    temp4 = locations->GetTemp(4).AsRegister<Register>();
   }
 
   Label loop;
@@ -1088,41 +1102,42 @@
   // Reference equality check, return 0 if same reference.
   __ subs(out, str, ShifterOperand(arg));
   __ b(&end, EQ);
+
   if (mirror::kUseStringCompression) {
-    // Load lengths of this and argument strings.
+    // Load `count` fields of this and argument strings.
     __ ldr(temp3, Address(str, count_offset));
-    __ ldr(temp4, Address(arg, count_offset));
-    // Clean out compression flag from lengths.
-    __ bic(temp0, temp3, ShifterOperand(0x80000000));
-    __ bic(IP, temp4, ShifterOperand(0x80000000));
+    __ ldr(temp2, Address(arg, count_offset));
+    // Extract lengths from the `count` fields.
+    __ Lsr(temp0, temp3, 1u);
+    __ Lsr(temp1, temp2, 1u);
   } else {
     // Load lengths of this and argument strings.
     __ ldr(temp0, Address(str, count_offset));
-    __ ldr(IP, Address(arg, count_offset));
+    __ ldr(temp1, Address(arg, count_offset));
   }
   // out = length diff.
-  __ subs(out, temp0, ShifterOperand(IP));
+  __ subs(out, temp0, ShifterOperand(temp1));
   // temp0 = min(len(str), len(arg)).
   __ it(GT);
-  __ mov(temp0, ShifterOperand(IP), GT);
+  __ mov(temp0, ShifterOperand(temp1), GT);
   // Shorter string is empty?
   __ CompareAndBranchIfZero(temp0, &end);
 
   if (mirror::kUseStringCompression) {
     // Check if both strings using same compression style to use this comparison loop.
-    __ eors(temp3, temp3, ShifterOperand(temp4));
-    __ b(&different_compression, MI);
-  }
-  // Store offset of string value in preparation for comparison loop.
-  __ mov(temp1, ShifterOperand(value_offset));
-  if (mirror::kUseStringCompression) {
+    __ eor(temp2, temp2, ShifterOperand(temp3));
+    __ Lsrs(temp2, temp2, 1u);
+    __ b(&different_compression, CS);
     // For string compression, calculate the number of bytes to compare (not chars).
     // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
-    __ cmp(temp4, ShifterOperand(0));
-    __ it(GE);
-    __ add(temp0, temp0, ShifterOperand(temp0), GE);
+    __ Lsls(temp3, temp3, 31u);  // Extract purely the compression flag.
+    __ it(NE);
+    __ add(temp0, temp0, ShifterOperand(temp0), NE);
   }
 
+  // Store offset of string value in preparation for comparison loop.
+  __ mov(temp1, ShifterOperand(value_offset));
+
   // Assertions that must hold in order to compare multiple characters at a time.
   CHECK_ALIGNED(value_offset, 8);
   static_assert(IsAligned<8>(kObjectAlignment),
@@ -1182,69 +1197,80 @@
   // The comparison is unsigned for string compression, otherwise signed.
   __ cmp(temp0, ShifterOperand(temp1, LSR, mirror::kUseStringCompression ? 3 : 4));
   __ b(&end, mirror::kUseStringCompression ? LS : LE);
+
   // Extract the characters and calculate the difference.
-  Label uncompressed_string, continue_process;
   if (mirror::kUseStringCompression) {
-    __ cmp(temp4, ShifterOperand(0));
-    __ b(&uncompressed_string, GE);
-    __ bic(temp1, temp1, ShifterOperand(0x7));
-    __ b(&continue_process);
+    // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear
+    // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`.
+    // The compression flag is now in the highest bit of temp3, so let's play some tricks.
+    __ orr(temp3, temp3, ShifterOperand(0xffu << 23));  // uncompressed ? 0xff800000u : 0x7ff80000u
+    __ bic(temp1, temp1, ShifterOperand(temp3, LSR, 31 - 3));  // &= ~(uncompressed ? 0xfu : 0x7u)
+    __ Asr(temp3, temp3, 7u);                           // uncompressed ? 0xffff0000u : 0xff0000u.
+    __ Lsr(temp2, temp2, temp1);                        // Extract second character.
+    __ Lsr(temp3, temp3, 16u);                          // uncompressed ? 0xffffu : 0xffu
+    __ Lsr(out, IP, temp1);                             // Extract first character.
+    __ and_(temp2, temp2, ShifterOperand(temp3));
+    __ and_(out, out, ShifterOperand(temp3));
+  } else {
+    __ bic(temp1, temp1, ShifterOperand(0xf));
+    __ Lsr(temp2, temp2, temp1);
+    __ Lsr(out, IP, temp1);
+    __ movt(temp2, 0);
+    __ movt(out, 0);
   }
-  __ Bind(&uncompressed_string);
-  __ bic(temp1, temp1, ShifterOperand(0xf));
-  __ Bind(&continue_process);
 
-  __ Lsr(temp2, temp2, temp1);
-  __ Lsr(IP, IP, temp1);
-  Label calculate_difference, uncompressed_string_extract_chars;
-  if (mirror::kUseStringCompression) {
-    __ cmp(temp4, ShifterOperand(0));
-    __ b(&uncompressed_string_extract_chars, GE);
-    __ ubfx(temp2, temp2, 0, 8);
-    __ ubfx(IP, IP, 0, 8);
-    __ b(&calculate_difference);
-  }
-  __ Bind(&uncompressed_string_extract_chars);
-  __ movt(temp2, 0);
-  __ movt(IP, 0);
-  __ Bind(&calculate_difference);
-  __ sub(out, IP, ShifterOperand(temp2));
-  __ b(&end);
+  __ sub(out, out, ShifterOperand(temp2));
 
   if (mirror::kUseStringCompression) {
+    __ b(&end);
+    __ Bind(&different_compression);
+
+    // Comparison for different compression style.
     const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
     DCHECK_EQ(c_char_size, 1u);
-    Label loop_arg_compressed, loop_this_compressed, find_diff;
-    // Comparison for different compression style.
-    // This part is when THIS is compressed and ARG is not.
-    __ Bind(&different_compression);
-    __ add(temp2, str, ShifterOperand(value_offset));
-    __ add(temp3, arg, ShifterOperand(value_offset));
-    __ cmp(temp4, ShifterOperand(0));
-    __ b(&loop_arg_compressed, LT);
 
-    __ Bind(&loop_this_compressed);
-    __ ldrb(IP, Address(temp2, c_char_size, Address::PostIndex));
-    __ ldrh(temp4, Address(temp3, char_size, Address::PostIndex));
-    __ cmp(IP, ShifterOperand(temp4));
-    __ b(&find_diff, NE);
-    __ subs(temp0, temp0, ShifterOperand(1));
-    __ b(&loop_this_compressed, GT);
-    __ b(&end);
+    // We want to free up the temp3, currently holding `str.count`, for comparison.
+    // So, we move it to the bottom bit of the iteration count `temp0` which we tnen
+    // need to treat as unsigned. Start by freeing the bit with an ADD and continue
+    // further down by a LSRS+SBC which will flip the meaning of the flag but allow
+    // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
+    __ add(temp0, temp0, ShifterOperand(temp0));  // Unlike LSL, this ADD is always 16-bit.
+    // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
+    __ mov(temp1, ShifterOperand(str));
+    __ mov(temp2, ShifterOperand(arg));
+    __ Lsrs(temp3, temp3, 1u);                // Continue the move of the compression flag.
+    __ it(CS, kItThen);                       // Interleave with selection of temp1 and temp2.
+    __ mov(temp1, ShifterOperand(arg), CS);   // Preserves flags.
+    __ mov(temp2, ShifterOperand(str), CS);   // Preserves flags.
+    __ sbc(temp0, temp0, ShifterOperand(0));  // Complete the move of the compression flag.
 
-    // This part is when THIS is not compressed and ARG is.
-    __ Bind(&loop_arg_compressed);
-    __ ldrh(IP, Address(temp2, char_size, Address::PostIndex));
-    __ ldrb(temp4, Address(temp3, c_char_size, Address::PostIndex));
-    __ cmp(IP, ShifterOperand(temp4));
-    __ b(&find_diff, NE);
-    __ subs(temp0, temp0, ShifterOperand(1));
-    __ b(&loop_arg_compressed, GT);
+    // Adjust temp1 and temp2 from string pointers to data pointers.
+    __ add(temp1, temp1, ShifterOperand(value_offset));
+    __ add(temp2, temp2, ShifterOperand(value_offset));
+
+    Label different_compression_loop;
+    Label different_compression_diff;
+
+    // Main loop for different compression.
+    __ Bind(&different_compression_loop);
+    __ ldrb(IP, Address(temp1, c_char_size, Address::PostIndex));
+    __ ldrh(temp3, Address(temp2, char_size, Address::PostIndex));
+    __ cmp(IP, ShifterOperand(temp3));
+    __ b(&different_compression_diff, NE);
+    __ subs(temp0, temp0, ShifterOperand(2));
+    __ b(&different_compression_loop, HI);
     __ b(&end);
 
     // Calculate the difference.
-    __ Bind(&find_diff);
-    __ sub(out, IP, ShifterOperand(temp4));
+    __ Bind(&different_compression_diff);
+    __ sub(out, IP, ShifterOperand(temp3));
+    // Flip the difference if the `arg` is compressed.
+    // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag.
+    __ Lsrs(temp0, temp0, 1u);
+    static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                  "Expecting 0=compressed, 1=uncompressed");
+    __ it(CC);
+    __ rsb(out, out, ShifterOperand(0), CC);
   }
 
   __ Bind(&end);
@@ -1282,7 +1308,7 @@
   Register temp1 = locations->GetTemp(1).AsRegister<Register>();
   Register temp2 = locations->GetTemp(2).AsRegister<Register>();
 
-  Label loop, preloop;
+  Label loop;
   Label end;
   Label return_true;
   Label return_false;
@@ -1301,6 +1327,10 @@
     __ CompareAndBranchIfZero(arg, &return_false);
   }
 
+  // Reference equality check, return true if same reference.
+  __ cmp(str, ShifterOperand(arg));
+  __ b(&return_true, EQ);
+
   if (!optimizations.GetArgumentIsString()) {
     // Instanceof check for the argument by comparing class fields.
     // All string objects must have the same type since String cannot be subclassed.
@@ -1312,48 +1342,44 @@
     __ b(&return_false, NE);
   }
 
-  // Load lengths of this and argument strings.
+  // Load `count` fields of this and argument strings.
   __ ldr(temp, Address(str, count_offset));
   __ ldr(temp1, Address(arg, count_offset));
-  // Check if lengths are equal, return false if they're not.
+  // Check if `count` fields are equal, return false if they're not.
   // Also compares the compression style, if differs return false.
   __ cmp(temp, ShifterOperand(temp1));
   __ b(&return_false, NE);
-  // Return true if both strings are empty.
-  if (mirror::kUseStringCompression) {
-    // Length needs to be masked out first because 0 is treated as compressed.
-    __ bic(temp, temp, ShifterOperand(0x80000000));
-  }
+  // Return true if both strings are empty. Even with string compression `count == 0` means empty.
+  static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                "Expecting 0=compressed, 1=uncompressed");
   __ cbz(temp, &return_true);
-  // Reference equality check, return true if same reference.
-  __ cmp(str, ShifterOperand(arg));
-  __ b(&return_true, EQ);
 
-  // Assertions that must hold in order to compare strings 2 characters at a time.
+  // Assertions that must hold in order to compare strings 4 bytes at a time.
   DCHECK_ALIGNED(value_offset, 4);
   static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
 
   if (mirror::kUseStringCompression) {
-    // If not compressed, directly to fast compare. Else do preprocess on length.
-    __ cmp(temp1, ShifterOperand(0));
-    __ b(&preloop, GT);
-    // Mask out compression flag and adjust length for compressed string (8-bit)
-    // as if it is a 16-bit data, new_length = (length + 1) / 2.
-    __ add(temp, temp, ShifterOperand(1));
-    __ Lsr(temp, temp, 1);
-    __ Bind(&preloop);
+    // For string compression, calculate the number of bytes to compare (not chars).
+    // This could in theory exceed INT32_MAX, so treat temp as unsigned.
+    __ Lsrs(temp, temp, 1u);                        // Extract length and check compression flag.
+    __ it(CS);                                      // If uncompressed,
+    __ add(temp, temp, ShifterOperand(temp), CS);   //   double the byte count.
   }
-  // Loop to compare strings 2 characters at a time starting at the front of the string.
-  // Ok to do this because strings with an odd length are zero-padded.
+
+  // Store offset of string value in preparation for comparison loop.
   __ LoadImmediate(temp1, value_offset);
+
+  // Loop to compare strings 4 bytes at a time starting at the front of the string.
+  // Ok to do this because strings are zero-padded to kObjectAlignment.
   __ Bind(&loop);
   __ ldr(out, Address(str, temp1));
   __ ldr(temp2, Address(arg, temp1));
+  __ add(temp1, temp1, ShifterOperand(sizeof(uint32_t)));
   __ cmp(out, ShifterOperand(temp2));
   __ b(&return_false, NE);
-  __ add(temp1, temp1, ShifterOperand(sizeof(uint32_t)));
-  __ subs(temp, temp, ShifterOperand(sizeof(uint32_t) /  sizeof(uint16_t)));
-  __ b(&loop, GT);
+  // With string compression, we have compared 4 bytes, otherwise 2 chars.
+  __ subs(temp, temp, ShifterOperand(mirror::kUseStringCompression ? 4 : 2));
+  __ b(&loop, HI);
 
   // Return true and exit the function.
   // If loop does not result in returning false, we return true.
@@ -1929,7 +1955,7 @@
     //   if (src_ptr != end_ptr) {
     //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
     //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
-    //     bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+    //     bool is_gray = (rb_state == ReadBarrier::GrayState());
     //     if (is_gray) {
     //       // Slow-path copy.
     //       do {
@@ -1970,9 +1996,8 @@
     // Given the numeric representation, it's enough to check the low bit of the
     // rb_state. We do that by shifting the bit out of the lock word with LSRS
     // which can be a 16-bit instruction unlike the TST immediate.
-    static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
-    static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
-    static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+    static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+    static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
     __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
     // Carry flag is the last bit shifted out by LSRS.
     __ b(read_barrier_slow_path->GetEntryLabel(), CS);
@@ -2462,8 +2487,8 @@
     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
     // String's length.
     __ ldr(IP, Address(srcObj, count_offset));
-    __ cmp(IP, ShifterOperand(0));
-    __ b(&compressed_string_preloop, LT);
+    __ tst(IP, ShifterOperand(1));
+    __ b(&compressed_string_preloop, EQ);
   }
   __ add(src_ptr, src_ptr, ShifterOperand(srcBegin, LSL, 1));
 
@@ -2498,9 +2523,10 @@
   __ subs(num_chr, num_chr, ShifterOperand(1));
   __ strh(IP, Address(dst_ptr, char_size, Address::PostIndex));
   __ b(&remainder, GT);
-  __ b(&done);
 
   if (mirror::kUseStringCompression) {
+    __ b(&done);
+
     const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
     DCHECK_EQ(c_char_size, 1u);
     // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
@@ -2585,6 +2611,9 @@
 UNIMPLEMENTED_INTRINSIC(ARM, IntegerLowestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM, LongLowestOneBit)
 
+UNIMPLEMENTED_INTRINSIC(ARM, StringStringIndexOf);
+UNIMPLEMENTED_INTRINSIC(ARM, StringStringIndexOfAfter);
+
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndAddInt)
 UNIMPLEMENTED_INTRINSIC(ARM, UnsafeGetAndAddLong)
diff --git a/compiler/optimizing/intrinsics_arm64.cc b/compiler/optimizing/intrinsics_arm64.cc
index e2c1802..451abc5 100644
--- a/compiler/optimizing/intrinsics_arm64.cc
+++ b/compiler/optimizing/intrinsics_arm64.cc
@@ -863,9 +863,9 @@
     codegen->GenerateReferenceLoadWithBakerReadBarrier(invoke,
                                                        trg_loc,
                                                        base,
-                                                       /* offset */ 0U,
+                                                       /* offset */ 0u,
                                                        /* index */ offset_loc,
-                                                       /* scale_factor */ 0U,
+                                                       /* scale_factor */ 0u,
                                                        temp,
                                                        /* needs_null_check */ false,
                                                        is_volatile);
@@ -880,7 +880,7 @@
 
     if (type == Primitive::kPrimNot) {
       DCHECK(trg.IsW());
-      codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
+      codegen->MaybeGenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0u, offset_loc);
     }
   }
 }
@@ -890,9 +890,9 @@
       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           can_call ?
-                                                               LocationSummary::kCallOnSlowPath :
-                                                               LocationSummary::kNoCall,
+                                                           (can_call
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall),
                                                            kIntrinsified);
   if (can_call && kUseBakerReadBarrier) {
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
@@ -901,7 +901,7 @@
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister(),
-                    can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+                    (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
 }
 
 void IntrinsicLocationsBuilderARM64::VisitUnsafeGet(HInvoke* invoke) {
@@ -1086,8 +1086,13 @@
 static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena,
                                        HInvoke* invoke,
                                        Primitive::Type type) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      kUseBakerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
+                                                           (can_call
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall),
                                                            kIntrinsified);
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
@@ -1096,20 +1101,29 @@
   locations->SetInAt(4, Location::RequiresRegister());
 
   // If heap poisoning is enabled, we don't want the unpoisoning
-  // operations to potentially clobber the output.
-  Location::OutputOverlap overlaps = (kPoisonHeapReferences && type == Primitive::kPrimNot)
+  // operations to potentially clobber the output. Likewise when
+  // emitting a (Baker) read barrier, which may call.
+  Location::OutputOverlap overlaps =
+      ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call)
       ? Location::kOutputOverlap
       : Location::kNoOutputOverlap;
   locations->SetOut(Location::RequiresRegister(), overlaps);
+  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // Temporary register for (Baker) read barrier.
+    locations->AddTemp(Location::RequiresRegister());
+  }
 }
 
-static void GenCas(LocationSummary* locations, Primitive::Type type, CodeGeneratorARM64* codegen) {
+static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARM64* codegen) {
   MacroAssembler* masm = codegen->GetVIXLAssembler();
+  LocationSummary* locations = invoke->GetLocations();
 
-  Register out = WRegisterFrom(locations->Out());                  // Boolean result.
+  Location out_loc = locations->Out();
+  Register out = WRegisterFrom(out_loc);                           // Boolean result.
 
   Register base = WRegisterFrom(locations->InAt(1));               // Object pointer.
-  Register offset = XRegisterFrom(locations->InAt(2));             // Long offset.
+  Location offset_loc = locations->InAt(2);
+  Register offset = XRegisterFrom(offset_loc);                     // Long offset.
   Register expected = RegisterFrom(locations->InAt(3), type);      // Expected.
   Register value = RegisterFrom(locations->InAt(4), type);         // Value.
 
@@ -1118,6 +1132,27 @@
     // Mark card for object assuming new value is stored.
     bool value_can_be_null = true;  // TODO: Worth finding out this information?
     codegen->MarkGCCard(base, value, value_can_be_null);
+
+    // The only read barrier implementation supporting the
+    // UnsafeCASObject intrinsic is the Baker-style read barriers.
+    DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      Register temp = WRegisterFrom(locations->GetTemp(0));
+      // Need to make sure the reference stored in the field is a to-space
+      // one before attempting the CAS or the CAS could fail incorrectly.
+      codegen->GenerateReferenceLoadWithBakerReadBarrier(
+          invoke,
+          out_loc,  // Unused, used only as a "temporary" within the read barrier.
+          base,
+          /* offset */ 0u,
+          /* index */ offset_loc,
+          /* scale_factor */ 0u,
+          temp,
+          /* needs_null_check */ false,
+          /* use_load_acquire */ false,
+          /* always_update_field */ true);
+    }
   }
 
   UseScratchRegisterScope temps(masm);
@@ -1145,14 +1180,6 @@
 
   vixl::aarch64::Label loop_head, exit_loop;
   __ Bind(&loop_head);
-  // TODO: When `type == Primitive::kPrimNot`, add a read barrier for
-  // the reference stored in the object before attempting the CAS,
-  // similar to the one in the art::Unsafe_compareAndSwapObject JNI
-  // implementation.
-  //
-  // Note that this code is not (yet) used when read barriers are
-  // enabled (see IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject).
-  DCHECK(!(type == Primitive::kPrimNot && kEmitCompilerReadBarrier));
   __ Ldaxr(tmp_value, MemOperand(tmp_ptr));
   __ Cmp(tmp_value, expected);
   __ B(&exit_loop, ne);
@@ -1179,14 +1206,9 @@
   CreateIntIntIntIntIntToInt(arena_, invoke, Primitive::kPrimLong);
 }
 void IntrinsicLocationsBuilderARM64::VisitUnsafeCASObject(HInvoke* invoke) {
-  // The UnsafeCASObject intrinsic is missing a read barrier, and
-  // therefore sometimes does not work as expected (b/25883050).
-  // Turn it off temporarily as a quick fix, until the read barrier is
-  // implemented (see TODO in GenCAS).
-  //
-  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
-  // this intrinsic.
-  if (kEmitCompilerReadBarrier) {
+  // The only read barrier implementation supporting the
+  // UnsafeCASObject intrinsic is the Baker-style read barriers.
+  if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
     return;
   }
 
@@ -1194,22 +1216,17 @@
 }
 
 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASInt(HInvoke* invoke) {
-  GenCas(invoke->GetLocations(), Primitive::kPrimInt, codegen_);
+  GenCas(invoke, Primitive::kPrimInt, codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASLong(HInvoke* invoke) {
-  GenCas(invoke->GetLocations(), Primitive::kPrimLong, codegen_);
+  GenCas(invoke, Primitive::kPrimLong, codegen_);
 }
 void IntrinsicCodeGeneratorARM64::VisitUnsafeCASObject(HInvoke* invoke) {
-  // The UnsafeCASObject intrinsic is missing a read barrier, and
-  // therefore sometimes does not work as expected (b/25883050).
-  // Turn it off temporarily as a quick fix, until the read barrier is
-  // implemented (see TODO in GenCAS).
-  //
-  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
-  // this intrinsic.
-  DCHECK(!kEmitCompilerReadBarrier);
+  // The only read barrier implementation supporting the
+  // UnsafeCASObject intrinsic is the Baker-style read barriers.
+  DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
 
-  GenCas(invoke->GetLocations(), Primitive::kPrimNot, codegen_);
+  GenCas(invoke, Primitive::kPrimNot, codegen_);
 }
 
 void IntrinsicLocationsBuilderARM64::VisitStringCompareTo(HInvoke* invoke) {
@@ -1226,7 +1243,6 @@
   // Need temporary registers for String compression's feature.
   if (mirror::kUseStringCompression) {
     locations->AddTemp(Location::RequiresRegister());
-    locations->AddTemp(Location::RequiresRegister());
   }
   locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
 }
@@ -1244,10 +1260,9 @@
   Register temp0 = WRegisterFrom(locations->GetTemp(0));
   Register temp1 = WRegisterFrom(locations->GetTemp(1));
   Register temp2 = WRegisterFrom(locations->GetTemp(2));
-  Register temp3, temp5;
+  Register temp3;
   if (mirror::kUseStringCompression) {
     temp3 = WRegisterFrom(locations->GetTemp(3));
-    temp5 = WRegisterFrom(locations->GetTemp(4));
   }
 
   vixl::aarch64::Label loop;
@@ -1274,68 +1289,65 @@
   // Reference equality check, return 0 if same reference.
   __ Subs(out, str, arg);
   __ B(&end, eq);
+
   if (mirror::kUseStringCompression) {
-    // Load lengths of this and argument strings.
+    // Load `count` fields of this and argument strings.
     __ Ldr(temp3, HeapOperand(str, count_offset));
-    __ Ldr(temp5, HeapOperand(arg, count_offset));
+    __ Ldr(temp2, HeapOperand(arg, count_offset));
     // Clean out compression flag from lengths.
-    __ Bic(temp0, temp3, Operand(static_cast<int32_t>(0x80000000)));
-    __ Bic(temp1, temp5, Operand(static_cast<int32_t>(0x80000000)));
+    __ Lsr(temp0, temp3, 1u);
+    __ Lsr(temp1, temp2, 1u);
   } else {
     // Load lengths of this and argument strings.
     __ Ldr(temp0, HeapOperand(str, count_offset));
     __ Ldr(temp1, HeapOperand(arg, count_offset));
   }
-  // Return zero if both strings are empty.
-  __ Orr(out, temp0, temp1);
-  __ Cbz(out, &end);
   // out = length diff.
   __ Subs(out, temp0, temp1);
-  // temp2 = min(len(str), len(arg)).
-  __ Csel(temp2, temp1, temp0, ge);
+  // temp0 = min(len(str), len(arg)).
+  __ Csel(temp0, temp1, temp0, ge);
   // Shorter string is empty?
-  __ Cbz(temp2, &end);
+  __ Cbz(temp0, &end);
 
   if (mirror::kUseStringCompression) {
     // Check if both strings using same compression style to use this comparison loop.
-    __ Eor(temp3.W(), temp3, Operand(temp5));
-    __ Tbnz(temp3.W(), kWRegSize - 1, &different_compression);
+    __ Eor(temp2, temp2, Operand(temp3));
+    // Interleave with compression flag extraction which is needed for both paths
+    // and also set flags which is needed only for the different compressions path.
+    __ Ands(temp3.W(), temp3.W(), Operand(1));
+    __ Tbnz(temp2, 0, &different_compression);  // Does not use flags.
   }
   // Store offset of string value in preparation for comparison loop.
   __ Mov(temp1, value_offset);
   if (mirror::kUseStringCompression) {
     // For string compression, calculate the number of bytes to compare (not chars).
-    // This could be in theory exceed INT32_MAX, so treat temp2 as unsigned.
-    vixl::aarch64::Label let_it_signed;
-    __ Cmp(temp5, Operand(0));
-    __ B(lt, &let_it_signed);
-    __ Add(temp2, temp2, Operand(temp2));
-    __ Bind(&let_it_signed);
+    // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
+    __ Lsl(temp0, temp0, temp3);
   }
 
   UseScratchRegisterScope scratch_scope(masm);
   Register temp4 = scratch_scope.AcquireX();
 
-  // Assertions that must hold in order to compare strings 4 characters at a time.
+  // Assertions that must hold in order to compare strings 8 bytes at a time.
   DCHECK_ALIGNED(value_offset, 8);
   static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
 
   const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
   DCHECK_EQ(char_size, 2u);
 
-  // Promote temp0 to an X reg, ready for LDR.
-  temp0 = temp0.X();
+  // Promote temp2 to an X reg, ready for LDR.
+  temp2 = temp2.X();
 
   // Loop to compare 4x16-bit characters at a time (ok because of string data alignment).
   __ Bind(&loop);
   __ Ldr(temp4, MemOperand(str.X(), temp1.X()));
-  __ Ldr(temp0, MemOperand(arg.X(), temp1.X()));
-  __ Cmp(temp4, temp0);
+  __ Ldr(temp2, MemOperand(arg.X(), temp1.X()));
+  __ Cmp(temp4, temp2);
   __ B(ne, &find_char_diff);
   __ Add(temp1, temp1, char_size * 4);
   // With string compression, we have compared 8 bytes, otherwise 4 chars.
-  __ Subs(temp2, temp2, (mirror::kUseStringCompression) ? 8 : 4);
-  __ B(hi, &loop);
+  __ Subs(temp0, temp0, (mirror::kUseStringCompression) ? 8 : 4);
+  __ B(&loop, hi);
   __ B(&end);
 
   // Promote temp1 to an X reg, ready for EOR.
@@ -1344,78 +1356,85 @@
   // Find the single character difference.
   __ Bind(&find_char_diff);
   // Get the bit position of the first character that differs.
-  __ Eor(temp1, temp0, temp4);
+  __ Eor(temp1, temp2, temp4);
   __ Rbit(temp1, temp1);
   __ Clz(temp1, temp1);
+
   // If the number of chars remaining <= the index where the difference occurs (0-3), then
   // the difference occurs outside the remaining string data, so just return length diff (out).
   // Unlike ARM, we're doing the comparison in one go here, without the subtraction at the
   // find_char_diff_2nd_cmp path, so it doesn't matter whether the comparison is signed or
   // unsigned when string compression is disabled.
   // When it's enabled, the comparison must be unsigned.
-  __ Cmp(temp2, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4));
+  __ Cmp(temp0, Operand(temp1.W(), LSR, (mirror::kUseStringCompression) ? 3 : 4));
   __ B(ls, &end);
+
   // Extract the characters and calculate the difference.
-  vixl::aarch64::Label uncompressed_string, continue_process;
   if (mirror:: kUseStringCompression) {
-    __ Tbz(temp5, kWRegSize - 1, &uncompressed_string);
     __ Bic(temp1, temp1, 0x7);
-    __ B(&continue_process);
+    __ Bic(temp1, temp1, Operand(temp3.X(), LSL, 3u));
+  } else {
+    __ Bic(temp1, temp1, 0xf);
   }
-  __ Bind(&uncompressed_string);
-  __ Bic(temp1, temp1, 0xf);
-  __ Bind(&continue_process);
-
-  __ Lsr(temp0, temp0, temp1);
+  __ Lsr(temp2, temp2, temp1);
   __ Lsr(temp4, temp4, temp1);
-  vixl::aarch64::Label uncompressed_string_extract_chars;
   if (mirror::kUseStringCompression) {
-    __ Tbz(temp5, kWRegSize - 1, &uncompressed_string_extract_chars);
-    __ And(temp4, temp4, 0xff);
-    __ Sub(out, temp4.W(), Operand(temp0.W(), UXTB));
-    __ B(&end);
+    // Prioritize the case of compressed strings and calculate such result first.
+    __ Uxtb(temp1, temp4);
+    __ Sub(out, temp1.W(), Operand(temp2.W(), UXTB));
+    __ Tbz(temp3, 0u, &end);  // If actually compressed, we're done.
   }
-  __ Bind(&uncompressed_string_extract_chars);
-  __ And(temp4, temp4, 0xffff);
-  __ Sub(out, temp4.W(), Operand(temp0.W(), UXTH));
-  __ B(&end);
+  __ Uxth(temp4, temp4);
+  __ Sub(out, temp4.W(), Operand(temp2.W(), UXTH));
 
   if (mirror::kUseStringCompression) {
-    vixl::aarch64::Label loop_this_compressed, loop_arg_compressed, find_diff;
+    __ B(&end);
+    __ Bind(&different_compression);
+
+    // Comparison for different compression style.
     const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
     DCHECK_EQ(c_char_size, 1u);
-    temp0 = temp0.W();
     temp1 = temp1.W();
-    // Comparison for different compression style.
-    // This part is when THIS is compressed and ARG is not.
-    __ Bind(&different_compression);
-    __ Add(temp0, str, Operand(value_offset));
-    __ Add(temp1, arg, Operand(value_offset));
-    __ Cmp(temp5, Operand(0));
-    __ B(lt, &loop_arg_compressed);
+    temp2 = temp2.W();
+    temp4 = temp4.W();
 
-    __ Bind(&loop_this_compressed);
-    __ Ldrb(temp3, MemOperand(temp0.X(), c_char_size, PostIndex));
-    __ Ldrh(temp5, MemOperand(temp1.X(), char_size, PostIndex));
-    __ Cmp(temp3, Operand(temp5));
-    __ B(ne, &find_diff);
-    __ Subs(temp2, temp2, 1);
-    __ B(gt, &loop_this_compressed);
-    __ B(&end);
+    // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
+    // Note that flags have been set by the `str` compression flag extraction to `temp3`
+    // before branching to the `different_compression` label.
+    __ Csel(temp1, str, arg, eq);   // Pointer to the compressed string.
+    __ Csel(temp2, str, arg, ne);   // Pointer to the uncompressed string.
 
-    // This part is when THIS is not compressed and ARG is.
-    __ Bind(&loop_arg_compressed);
-    __ Ldrh(temp3, MemOperand(temp0.X(), char_size, PostIndex));
-    __ Ldrb(temp5, MemOperand(temp1.X(), c_char_size, PostIndex));
-    __ Cmp(temp3, Operand(temp5));
-    __ B(ne, &find_diff);
-    __ Subs(temp2, temp2, 1);
-    __ B(gt, &loop_arg_compressed);
+    // We want to free up the temp3, currently holding `str` compression flag, for comparison.
+    // So, we move it to the bottom bit of the iteration count `temp0` which we then need to treat
+    // as unsigned. Start by freeing the bit with a LSL and continue further down by a SUB which
+    // will allow `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
+    __ Lsl(temp0, temp0, 1u);
+
+    // Adjust temp1 and temp2 from string pointers to data pointers.
+    __ Add(temp1, temp1, Operand(value_offset));
+    __ Add(temp2, temp2, Operand(value_offset));
+
+    // Complete the move of the compression flag.
+    __ Sub(temp0, temp0, Operand(temp3));
+
+    vixl::aarch64::Label different_compression_loop;
+    vixl::aarch64::Label different_compression_diff;
+
+    __ Bind(&different_compression_loop);
+    __ Ldrb(temp4, MemOperand(temp1.X(), c_char_size, PostIndex));
+    __ Ldrh(temp3, MemOperand(temp2.X(), char_size, PostIndex));
+    __ Subs(temp4, temp4, Operand(temp3));
+    __ B(&different_compression_diff, ne);
+    __ Subs(temp0, temp0, 2);
+    __ B(&different_compression_loop, hi);
     __ B(&end);
 
     // Calculate the difference.
-    __ Bind(&find_diff);
-    __ Sub(out, temp3.W(), Operand(temp5.W(), UXTH));
+    __ Bind(&different_compression_diff);
+    __ Tst(temp0, Operand(1));
+    static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                  "Expecting 0=compressed, 1=uncompressed");
+    __ Cneg(out, temp4, ne);
   }
 
   __ Bind(&end);
@@ -1451,7 +1470,7 @@
   Register temp1 = WRegisterFrom(locations->GetTemp(0));
   Register temp2 = WRegisterFrom(locations->GetTemp(1));
 
-  vixl::aarch64::Label loop, preloop;
+  vixl::aarch64::Label loop;
   vixl::aarch64::Label end;
   vixl::aarch64::Label return_true;
   vixl::aarch64::Label return_false;
@@ -1485,49 +1504,46 @@
     __ B(&return_false, ne);
   }
 
-  // Load lengths of this and argument strings.
+  // Load `count` fields of this and argument strings.
   __ Ldr(temp, MemOperand(str.X(), count_offset));
   __ Ldr(temp1, MemOperand(arg.X(), count_offset));
-  // Check if lengths are equal, return false if they're not.
+  // Check if `count` fields are equal, return false if they're not.
   // Also compares the compression style, if differs return false.
   __ Cmp(temp, temp1);
   __ B(&return_false, ne);
-  // Return true if both strings are empty.
-  if (mirror::kUseStringCompression) {
-    // Length needs to be masked out first because 0 is treated as compressed.
-    __ Bic(temp, temp, Operand(static_cast<int32_t>(0x80000000)));
-  }
+  // Return true if both strings are empty. Even with string compression `count == 0` means empty.
+  static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                "Expecting 0=compressed, 1=uncompressed");
   __ Cbz(temp, &return_true);
 
-  // Assertions that must hold in order to compare strings 4 characters at a time.
+  // Assertions that must hold in order to compare strings 8 bytes at a time.
   DCHECK_ALIGNED(value_offset, 8);
   static_assert(IsAligned<8>(kObjectAlignment), "String of odd length is not zero padded");
 
   if (mirror::kUseStringCompression) {
-    // If not compressed, directly to fast compare. Else do preprocess on length.
-    __ Cmp(temp1, Operand(0));
-    __ B(&preloop, gt);
-    // Mask out compression flag and adjust length for compressed string (8-bit)
-    // as if it is a 16-bit data, new_length = (length + 1) / 2
-    __ Add(temp, temp, 1);
-    __ Lsr(temp, temp, 1);
+    // For string compression, calculate the number of bytes to compare (not chars).
+    // This could in theory exceed INT32_MAX, so treat temp as unsigned.
+    __ Lsr(temp, temp, 1u);             // Extract length.
+    __ And(temp1, temp1, Operand(1));   // Extract compression flag.
+    __ Lsl(temp, temp, temp1);          // Calculate number of bytes to compare.
   }
 
+  // Store offset of string value in preparation for comparison loop
+  __ Mov(temp1, value_offset);
+
   temp1 = temp1.X();
   temp2 = temp2.X();
-  // Loop to compare strings 4 characters at a time starting at the beginning of the string.
-  // Ok to do this because strings are zero-padded to be 8-byte aligned.
-  // Store offset of string value in preparation for comparison loop
-  __ Bind(&preloop);
-  __ Mov(temp1, value_offset);
+  // Loop to compare strings 8 bytes at a time starting at the front of the string.
+  // Ok to do this because strings are zero-padded to kObjectAlignment.
   __ Bind(&loop);
   __ Ldr(out, MemOperand(str.X(), temp1));
   __ Ldr(temp2, MemOperand(arg.X(), temp1));
   __ Add(temp1, temp1, Operand(sizeof(uint64_t)));
   __ Cmp(out, temp2);
   __ B(&return_false, ne);
-  __ Sub(temp, temp, Operand(4), SetFlags);
-  __ B(&loop, gt);
+  // With string compression, we have compared 8 bytes, otherwise 4 chars.
+  __ Sub(temp, temp, Operand(mirror::kUseStringCompression ? 8 : 4), SetFlags);
+  __ B(&loop, hi);
 
   // Return true and exit the function.
   // If loop does not result in returning false, we return true.
@@ -1883,10 +1899,6 @@
   locations->AddTemp(Location::RequiresRegister());
   locations->AddTemp(Location::RequiresRegister());
   locations->AddTemp(Location::RequiresRegister());
-  // Need temporary register for String compression feature.
-  if (mirror::kUseStringCompression) {
-    locations->AddTemp(Location::RequiresRegister());
-  }
 }
 
 void IntrinsicCodeGeneratorARM64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
@@ -1914,10 +1926,6 @@
   Register src_ptr = XRegisterFrom(locations->GetTemp(0));
   Register num_chr = XRegisterFrom(locations->GetTemp(1));
   Register tmp1 = XRegisterFrom(locations->GetTemp(2));
-  Register tmp3;
-  if (mirror::kUseStringCompression) {
-    tmp3 = WRegisterFrom(locations->GetTemp(3));
-  }
 
   UseScratchRegisterScope temps(masm);
   Register dst_ptr = temps.AcquireX();
@@ -1940,8 +1948,8 @@
     // Location of count in string.
     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
     // String's length.
-    __ Ldr(tmp3, MemOperand(srcObj, count_offset));
-    __ Tbnz(tmp3, kWRegSize - 1, &compressed_string_preloop);
+    __ Ldr(tmp2, MemOperand(srcObj, count_offset));
+    __ Tbz(tmp2, 0, &compressed_string_preloop);
   }
   __ Add(src_ptr, src_ptr, Operand(srcBegin, LSL, 1));
 
@@ -2642,7 +2650,7 @@
       //   if (src_ptr != end_ptr) {
       //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
       //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
-      //     bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+      //     bool is_gray = (rb_state == ReadBarrier::GrayState());
       //     if (is_gray) {
       //       // Slow-path copy.
       //       do {
@@ -2687,9 +2695,8 @@
       codegen_->AddSlowPath(read_barrier_slow_path);
 
       // Given the numeric representation, it's enough to check the low bit of the rb_state.
-      static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
-      static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
-      static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+      static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+      static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
       __ Tbnz(tmp, LockWord::kReadBarrierStateShift, read_barrier_slow_path->GetEntryLabel());
 
       // Fast-path copy.
@@ -2772,6 +2779,9 @@
 UNIMPLEMENTED_INTRINSIC(ARM64, IntegerLowestOneBit)
 UNIMPLEMENTED_INTRINSIC(ARM64, LongLowestOneBit)
 
+UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOf);
+UNIMPLEMENTED_INTRINSIC(ARM64, StringStringIndexOfAfter);
+
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddInt)
 UNIMPLEMENTED_INTRINSIC(ARM64, UnsafeGetAndAddLong)
diff --git a/compiler/optimizing/intrinsics_arm_vixl.cc b/compiler/optimizing/intrinsics_arm_vixl.cc
new file mode 100644
index 0000000..e4bef34
--- /dev/null
+++ b/compiler/optimizing/intrinsics_arm_vixl.cc
@@ -0,0 +1,2717 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "intrinsics_arm_vixl.h"
+
+#include "arch/arm/instruction_set_features_arm.h"
+#include "code_generator_arm_vixl.h"
+#include "common_arm.h"
+#include "lock_word.h"
+#include "mirror/array-inl.h"
+
+#include "aarch32/constants-aarch32.h"
+
+namespace art {
+namespace arm {
+
+#define __ assembler->GetVIXLAssembler()->
+
+using helpers::DRegisterFrom;
+using helpers::HighRegisterFrom;
+using helpers::InputDRegisterAt;
+using helpers::InputRegisterAt;
+using helpers::InputSRegisterAt;
+using helpers::InputVRegisterAt;
+using helpers::Int32ConstantFrom;
+using helpers::LocationFrom;
+using helpers::LowRegisterFrom;
+using helpers::LowSRegisterFrom;
+using helpers::OutputDRegister;
+using helpers::OutputRegister;
+using helpers::OutputVRegister;
+using helpers::RegisterFrom;
+using helpers::SRegisterFrom;
+
+using namespace vixl::aarch32;  // NOLINT(build/namespaces)
+
+ArmVIXLAssembler* IntrinsicCodeGeneratorARMVIXL::GetAssembler() {
+  return codegen_->GetAssembler();
+}
+
+ArenaAllocator* IntrinsicCodeGeneratorARMVIXL::GetAllocator() {
+  return codegen_->GetGraph()->GetArena();
+}
+
+// Default slow-path for fallback (calling the managed code to handle the intrinsic) in an
+// intrinsified call. This will copy the arguments into the positions for a regular call.
+//
+// Note: The actual parameters are required to be in the locations given by the invoke's location
+//       summary. If an intrinsic modifies those locations before a slowpath call, they must be
+//       restored!
+//
+// Note: If an invoke wasn't sharpened, we will put down an invoke-virtual here. That's potentially
+//       sub-optimal (compared to a direct pointer call), but this is a slow-path.
+
+class IntrinsicSlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+  explicit IntrinsicSlowPathARMVIXL(HInvoke* invoke)
+      : SlowPathCodeARMVIXL(invoke), invoke_(invoke) {}
+
+  Location MoveArguments(CodeGenerator* codegen) {
+    InvokeDexCallingConventionVisitorARM calling_convention_visitor;
+    IntrinsicVisitor::MoveArguments(invoke_, codegen, &calling_convention_visitor);
+    return calling_convention_visitor.GetMethodLocation();
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    ArmVIXLAssembler* assembler = down_cast<ArmVIXLAssembler*>(codegen->GetAssembler());
+    __ Bind(GetEntryLabel());
+
+    SaveLiveRegisters(codegen, invoke_->GetLocations());
+
+    Location method_loc = MoveArguments(codegen);
+
+    if (invoke_->IsInvokeStaticOrDirect()) {
+      codegen->GenerateStaticOrDirectCall(invoke_->AsInvokeStaticOrDirect(), method_loc);
+    } else {
+      codegen->GenerateVirtualCall(invoke_->AsInvokeVirtual(), method_loc);
+    }
+    codegen->RecordPcInfo(invoke_, invoke_->GetDexPc(), this);
+
+    // Copy the result back to the expected output.
+    Location out = invoke_->GetLocations()->Out();
+    if (out.IsValid()) {
+      DCHECK(out.IsRegister());  // TODO: Replace this when we support output in memory.
+      DCHECK(!invoke_->GetLocations()->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
+      codegen->MoveFromReturnRegister(out, invoke_->GetType());
+    }
+
+    RestoreLiveRegisters(codegen, invoke_->GetLocations());
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE { return "IntrinsicSlowPath"; }
+
+ private:
+  // The instruction where this slow path is happening.
+  HInvoke* const invoke_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicSlowPathARMVIXL);
+};
+
+// Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
+class ReadBarrierSystemArrayCopySlowPathARMVIXL : public SlowPathCodeARMVIXL {
+ public:
+  explicit ReadBarrierSystemArrayCopySlowPathARMVIXL(HInstruction* instruction)
+      : SlowPathCodeARMVIXL(instruction) {
+    DCHECK(kEmitCompilerReadBarrier);
+    DCHECK(kUseBakerReadBarrier);
+  }
+
+  void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
+    CodeGeneratorARMVIXL* arm_codegen = down_cast<CodeGeneratorARMVIXL*>(codegen);
+    ArmVIXLAssembler* assembler = arm_codegen->GetAssembler();
+    LocationSummary* locations = instruction_->GetLocations();
+    DCHECK(locations->CanCall());
+    DCHECK(instruction_->IsInvokeStaticOrDirect())
+        << "Unexpected instruction in read barrier arraycopy slow path: "
+        << instruction_->DebugName();
+    DCHECK(instruction_->GetLocations()->Intrinsified());
+    DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
+
+    int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+    uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
+    uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+    vixl32::Register dest = InputRegisterAt(instruction_, 2);
+    Location dest_pos = locations->InAt(3);
+    vixl32::Register src_curr_addr = RegisterFrom(locations->GetTemp(0));
+    vixl32::Register dst_curr_addr = RegisterFrom(locations->GetTemp(1));
+    vixl32::Register src_stop_addr = RegisterFrom(locations->GetTemp(2));
+    vixl32::Register tmp = RegisterFrom(locations->GetTemp(3));
+
+    __ Bind(GetEntryLabel());
+    // Compute the base destination address in `dst_curr_addr`.
+    if (dest_pos.IsConstant()) {
+      int32_t constant = Int32ConstantFrom(dest_pos);
+      __ Add(dst_curr_addr, dest, element_size * constant + offset);
+    } else {
+      __ Add(dst_curr_addr,
+             dest,
+             Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
+      __ Add(dst_curr_addr, dst_curr_addr, offset);
+    }
+
+    vixl32::Label loop;
+    __ Bind(&loop);
+    __ Ldr(tmp, MemOperand(src_curr_addr, element_size, PostIndex));
+    assembler->MaybeUnpoisonHeapReference(tmp);
+    // TODO: Inline the mark bit check before calling the runtime?
+    // tmp = ReadBarrier::Mark(tmp);
+    // No need to save live registers; it's taken care of by the
+    // entrypoint. Also, there is no need to update the stack mask,
+    // as this runtime call will not trigger a garbage collection.
+    // (See ReadBarrierMarkSlowPathARM::EmitNativeCode for more
+    // explanations.)
+    DCHECK(!tmp.IsSP());
+    DCHECK(!tmp.IsLR());
+    DCHECK(!tmp.IsPC());
+    // IP is used internally by the ReadBarrierMarkRegX entry point
+    // as a temporary (and not preserved).  It thus cannot be used by
+    // any live register in this slow path.
+    DCHECK(!src_curr_addr.Is(ip));
+    DCHECK(!dst_curr_addr.Is(ip));
+    DCHECK(!src_stop_addr.Is(ip));
+    DCHECK(!tmp.Is(ip));
+    DCHECK(tmp.IsRegister()) << tmp;
+    int32_t entry_point_offset =
+        CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kArmPointerSize>(tmp.GetCode());
+    // This runtime call does not require a stack map.
+    arm_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
+    assembler->MaybePoisonHeapReference(tmp);
+    __ Str(tmp, MemOperand(dst_curr_addr, element_size, PostIndex));
+    __ Cmp(src_curr_addr, src_stop_addr);
+    __ B(ne, &loop);
+    __ B(GetExitLabel());
+  }
+
+  const char* GetDescription() const OVERRIDE {
+    return "ReadBarrierSystemArrayCopySlowPathARMVIXL";
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathARMVIXL);
+};
+
+IntrinsicLocationsBuilderARMVIXL::IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen)
+    : arena_(codegen->GetGraph()->GetArena()),
+      assembler_(codegen->GetAssembler()),
+      features_(codegen->GetInstructionSetFeatures()) {}
+
+bool IntrinsicLocationsBuilderARMVIXL::TryDispatch(HInvoke* invoke) {
+  Dispatch(invoke);
+  LocationSummary* res = invoke->GetLocations();
+  if (res == nullptr) {
+    return false;
+  }
+  return res->Intrinsified();
+}
+
+static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresRegister());
+}
+
+static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresFpuRegister());
+}
+
+static void MoveFPToInt(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
+  Location input = locations->InAt(0);
+  Location output = locations->Out();
+  if (is64bit) {
+    __ Vmov(LowRegisterFrom(output), HighRegisterFrom(output), DRegisterFrom(input));
+  } else {
+    __ Vmov(RegisterFrom(output), SRegisterFrom(input));
+  }
+}
+
+static void MoveIntToFP(LocationSummary* locations, bool is64bit, ArmVIXLAssembler* assembler) {
+  Location input = locations->InAt(0);
+  Location output = locations->Out();
+  if (is64bit) {
+    __ Vmov(DRegisterFrom(output), LowRegisterFrom(input), HighRegisterFrom(input));
+  } else {
+    __ Vmov(SRegisterFrom(output), RegisterFrom(input));
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+  CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+  CreateIntToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
+  MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitFloatIntBitsToFloat(HInvoke* invoke) {
+  MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+}
+
+static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
+}
+
+static void GenNumberOfLeadingZeros(LocationSummary* locations,
+                                    Primitive::Type type,
+                                    ArmVIXLAssembler* assembler) {
+  Location in = locations->InAt(0);
+  vixl32::Register out = RegisterFrom(locations->Out());
+
+  DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
+
+  if (type == Primitive::kPrimLong) {
+    vixl32::Register in_reg_lo = LowRegisterFrom(in);
+    vixl32::Register in_reg_hi = HighRegisterFrom(in);
+    vixl32::Label end;
+    __ Clz(out, in_reg_hi);
+    __ Cbnz(in_reg_hi, &end);
+    __ Clz(out, in_reg_lo);
+    __ Add(out, out, 32);
+    __ Bind(&end);
+  } else {
+    __ Clz(out, RegisterFrom(in));
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
+  GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
+  GenNumberOfLeadingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+static void GenNumberOfTrailingZeros(LocationSummary* locations,
+                                     Primitive::Type type,
+                                     ArmVIXLAssembler* assembler) {
+  DCHECK((type == Primitive::kPrimInt) || (type == Primitive::kPrimLong));
+
+  vixl32::Register out = RegisterFrom(locations->Out());
+
+  if (type == Primitive::kPrimLong) {
+    vixl32::Register in_reg_lo = LowRegisterFrom(locations->InAt(0));
+    vixl32::Register in_reg_hi = HighRegisterFrom(locations->InAt(0));
+    vixl32::Label end;
+    __ Rbit(out, in_reg_lo);
+    __ Clz(out, out);
+    __ Cbnz(in_reg_lo, &end);
+    __ Rbit(out, in_reg_hi);
+    __ Clz(out, out);
+    __ Add(out, out, 32);
+    __ Bind(&end);
+  } else {
+    vixl32::Register in = RegisterFrom(locations->InAt(0));
+    __ Rbit(out, in);
+    __ Clz(out, out);
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
+  GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
+  GenNumberOfTrailingZeros(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
+}
+
+static void MathAbsFP(HInvoke* invoke, ArmVIXLAssembler* assembler) {
+  __ Vabs(OutputVRegister(invoke), InputVRegisterAt(invoke, 0));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsDouble(HInvoke* invoke) {
+  MathAbsFP(invoke, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsFloat(HInvoke* invoke) {
+  MathAbsFP(invoke, GetAssembler());
+}
+
+static void CreateIntToIntPlusTemp(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+static void GenAbsInteger(LocationSummary* locations,
+                          bool is64bit,
+                          ArmVIXLAssembler* assembler) {
+  Location in = locations->InAt(0);
+  Location output = locations->Out();
+
+  vixl32::Register mask = RegisterFrom(locations->GetTemp(0));
+
+  if (is64bit) {
+    vixl32::Register in_reg_lo = LowRegisterFrom(in);
+    vixl32::Register in_reg_hi = HighRegisterFrom(in);
+    vixl32::Register out_reg_lo = LowRegisterFrom(output);
+    vixl32::Register out_reg_hi = HighRegisterFrom(output);
+
+    DCHECK(!out_reg_lo.Is(in_reg_hi)) << "Diagonal overlap unexpected.";
+
+    __ Asr(mask, in_reg_hi, 31);
+    __ Adds(out_reg_lo, in_reg_lo, mask);
+    __ Adc(out_reg_hi, in_reg_hi, mask);
+    __ Eor(out_reg_lo, mask, out_reg_lo);
+    __ Eor(out_reg_hi, mask, out_reg_hi);
+  } else {
+    vixl32::Register in_reg = RegisterFrom(in);
+    vixl32::Register out_reg = RegisterFrom(output);
+
+    __ Asr(mask, in_reg, 31);
+    __ Add(out_reg, in_reg, mask);
+    __ Eor(out_reg, mask, out_reg);
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
+  CreateIntToIntPlusTemp(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsInt(HInvoke* invoke) {
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
+}
+
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
+  CreateIntToIntPlusTemp(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathAbsLong(HInvoke* invoke) {
+  GenAbsInteger(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
+}
+
+static void GenMinMax(HInvoke* invoke, bool is_min, ArmVIXLAssembler* assembler) {
+  vixl32::Register op1 = InputRegisterAt(invoke, 0);
+  vixl32::Register op2 = InputRegisterAt(invoke, 1);
+  vixl32::Register out = OutputRegister(invoke);
+
+  __ Cmp(op1, op2);
+
+  {
+    AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
+                               3 * kMaxInstructionSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+
+    __ ite(is_min ? lt : gt);
+    __ mov(is_min ? lt : gt, out, op1);
+    __ mov(is_min ? ge : le, out, op2);
+  }
+}
+
+static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathMinIntInt(HInvoke* invoke) {
+  GenMinMax(invoke, /* is_min */ true, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
+  CreateIntIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathMaxIntInt(HInvoke* invoke) {
+  GenMinMax(invoke, /* is_min */ false, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathSqrt(HInvoke* invoke) {
+  CreateFPToFPLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathSqrt(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  __ Vsqrt(OutputDRegister(invoke), InputDRegisterAt(invoke, 0));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekByte(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  // Ignore upper 4B of long address.
+  __ Ldrsb(OutputRegister(invoke), LowRegisterFrom(invoke->GetLocations()->InAt(0)));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekIntNative(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  // Ignore upper 4B of long address.
+  __ Ldr(OutputRegister(invoke), LowRegisterFrom(invoke->GetLocations()->InAt(0)));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekLongNative(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  // Ignore upper 4B of long address.
+  vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
+  // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
+  // exception. So we can't use ldrd as addr may be unaligned.
+  vixl32::Register lo = LowRegisterFrom(invoke->GetLocations()->Out());
+  vixl32::Register hi = HighRegisterFrom(invoke->GetLocations()->Out());
+  if (addr.Is(lo)) {
+    __ Ldr(hi, MemOperand(addr, 4));
+    __ Ldr(lo, addr);
+  } else {
+    __ Ldr(lo, addr);
+    __ Ldr(hi, MemOperand(addr, 4));
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPeekShortNative(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  // Ignore upper 4B of long address.
+  __ Ldrsh(OutputRegister(invoke), LowRegisterFrom(invoke->GetLocations()->InAt(0)));
+}
+
+static void CreateIntIntToVoidLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeByte(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  __ Strb(InputRegisterAt(invoke, 1), LowRegisterFrom(invoke->GetLocations()->InAt(0)));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeIntNative(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  __ Str(InputRegisterAt(invoke, 1), LowRegisterFrom(invoke->GetLocations()->InAt(0)));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeLongNative(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  // Ignore upper 4B of long address.
+  vixl32::Register addr = LowRegisterFrom(invoke->GetLocations()->InAt(0));
+  // Worst case: Control register bit SCTLR.A = 0. Then unaligned accesses throw a processor
+  // exception. So we can't use ldrd as addr may be unaligned.
+  __ Str(LowRegisterFrom(invoke->GetLocations()->InAt(1)), addr);
+  __ Str(HighRegisterFrom(invoke->GetLocations()->InAt(1)), MemOperand(addr, 4));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
+  CreateIntIntToVoidLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMemoryPokeShortNative(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  __ Strh(InputRegisterAt(invoke, 1), LowRegisterFrom(invoke->GetLocations()->InAt(0)));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitThreadCurrentThread(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  __ Ldr(OutputRegister(invoke),
+         MemOperand(tr, Thread::PeerOffset<kArmPointerSize>().Int32Value()));
+}
+
+static void GenUnsafeGet(HInvoke* invoke,
+                         Primitive::Type type,
+                         bool is_volatile,
+                         CodeGeneratorARMVIXL* codegen) {
+  LocationSummary* locations = invoke->GetLocations();
+  ArmVIXLAssembler* assembler = codegen->GetAssembler();
+  Location base_loc = locations->InAt(1);
+  vixl32::Register base = InputRegisterAt(invoke, 1);     // Object pointer.
+  Location offset_loc = locations->InAt(2);
+  vixl32::Register offset = LowRegisterFrom(offset_loc);  // Long offset, lo part only.
+  Location trg_loc = locations->Out();
+
+  switch (type) {
+    case Primitive::kPrimInt: {
+      vixl32::Register trg = RegisterFrom(trg_loc);
+      __ Ldr(trg, MemOperand(base, offset));
+      if (is_volatile) {
+        __ Dmb(vixl32::ISH);
+      }
+      break;
+    }
+
+    case Primitive::kPrimNot: {
+      vixl32::Register trg = RegisterFrom(trg_loc);
+      if (kEmitCompilerReadBarrier) {
+        if (kUseBakerReadBarrier) {
+          Location temp = locations->GetTemp(0);
+          codegen->GenerateReferenceLoadWithBakerReadBarrier(
+              invoke, trg_loc, base, 0U, offset_loc, TIMES_1, temp, /* needs_null_check */ false);
+          if (is_volatile) {
+            __ Dmb(vixl32::ISH);
+          }
+        } else {
+          __ Ldr(trg, MemOperand(base, offset));
+          if (is_volatile) {
+            __ Dmb(vixl32::ISH);
+          }
+          codegen->GenerateReadBarrierSlow(invoke, trg_loc, trg_loc, base_loc, 0U, offset_loc);
+        }
+      } else {
+        __ Ldr(trg, MemOperand(base, offset));
+        if (is_volatile) {
+          __ Dmb(vixl32::ISH);
+        }
+        assembler->MaybeUnpoisonHeapReference(trg);
+      }
+      break;
+    }
+
+    case Primitive::kPrimLong: {
+      vixl32::Register trg_lo = LowRegisterFrom(trg_loc);
+      vixl32::Register trg_hi = HighRegisterFrom(trg_loc);
+      if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
+        __ Ldrexd(trg_lo, trg_hi, MemOperand(base, offset));
+      } else {
+        __ Ldrd(trg_lo, trg_hi, MemOperand(base, offset));
+      }
+      if (is_volatile) {
+        __ Dmb(vixl32::ISH);
+      }
+      break;
+    }
+
+    default:
+      LOG(FATAL) << "Unexpected type " << type;
+      UNREACHABLE();
+  }
+}
+
+static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
+                                          HInvoke* invoke,
+                                          Primitive::Type type) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
+       invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           (can_call
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall),
+                                                           kIntrinsified);
+  if (can_call && kUseBakerReadBarrier) {
+    locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
+  }
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(),
+                    (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
+  if (type == Primitive::kPrimNot && kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // We need a temporary register for the read barrier marking slow
+    // path in InstructionCodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier.
+    locations->AddTemp(Location::RequiresRegister());
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
+  CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGet(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetVolatile(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLong(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObject(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
+  GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
+}
+
+static void CreateIntIntIntIntToVoid(ArenaAllocator* arena,
+                                     const ArmInstructionSetFeatures& features,
+                                     Primitive::Type type,
+                                     bool is_volatile,
+                                     HInvoke* invoke) {
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           LocationSummary::kNoCall,
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RequiresRegister());
+
+  if (type == Primitive::kPrimLong) {
+    // Potentially need temps for ldrexd-strexd loop.
+    if (is_volatile && !features.HasAtomicLdrdAndStrd()) {
+      locations->AddTemp(Location::RequiresRegister());  // Temp_lo.
+      locations->AddTemp(Location::RequiresRegister());  // Temp_hi.
+    }
+  } else if (type == Primitive::kPrimNot) {
+    // Temps for card-marking.
+    locations->AddTemp(Location::RequiresRegister());  // Temp.
+    locations->AddTemp(Location::RequiresRegister());  // Card.
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePut(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ false, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimInt, /* is_volatile */ true, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ false, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(arena_, features_, Primitive::kPrimNot, /* is_volatile */ true, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(
+      arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(
+      arena_, features_, Primitive::kPrimLong, /* is_volatile */ false, invoke);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+  CreateIntIntIntIntToVoid(
+      arena_, features_, Primitive::kPrimLong, /* is_volatile */ true, invoke);
+}
+
+static void GenUnsafePut(LocationSummary* locations,
+                         Primitive::Type type,
+                         bool is_volatile,
+                         bool is_ordered,
+                         CodeGeneratorARMVIXL* codegen) {
+  ArmVIXLAssembler* assembler = codegen->GetAssembler();
+
+  vixl32::Register base = RegisterFrom(locations->InAt(1));       // Object pointer.
+  vixl32::Register offset = LowRegisterFrom(locations->InAt(2));  // Long offset, lo part only.
+  vixl32::Register value;
+
+  if (is_volatile || is_ordered) {
+    __ Dmb(vixl32::ISH);
+  }
+
+  if (type == Primitive::kPrimLong) {
+    vixl32::Register value_lo = LowRegisterFrom(locations->InAt(3));
+    vixl32::Register value_hi = HighRegisterFrom(locations->InAt(3));
+    value = value_lo;
+    if (is_volatile && !codegen->GetInstructionSetFeatures().HasAtomicLdrdAndStrd()) {
+      vixl32::Register temp_lo = RegisterFrom(locations->GetTemp(0));
+      vixl32::Register temp_hi = RegisterFrom(locations->GetTemp(1));
+      UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+      const vixl32::Register temp_reg = temps.Acquire();
+
+      __ Add(temp_reg, base, offset);
+      vixl32::Label loop_head;
+      __ Bind(&loop_head);
+      __ Ldrexd(temp_lo, temp_hi, temp_reg);
+      __ Strexd(temp_lo, value_lo, value_hi, temp_reg);
+      __ Cmp(temp_lo, 0);
+      __ B(ne, &loop_head);
+    } else {
+      __ Strd(value_lo, value_hi, MemOperand(base, offset));
+    }
+  } else {
+    value = RegisterFrom(locations->InAt(3));
+    vixl32::Register source = value;
+    if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+      vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+      __ Mov(temp, value);
+      assembler->PoisonHeapReference(temp);
+      source = temp;
+    }
+    __ Str(source, MemOperand(base, offset));
+  }
+
+  if (is_volatile) {
+    __ Dmb(vixl32::ISH);
+  }
+
+  if (type == Primitive::kPrimNot) {
+    vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+    vixl32::Register card = RegisterFrom(locations->GetTemp(1));
+    bool value_can_be_null = true;  // TODO: Worth finding out this information?
+    codegen->MarkGCCard(temp, card, base, value, value_can_be_null);
+  }
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePut(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutOrdered(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutVolatile(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimInt,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObject(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimNot,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLong(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ false,
+               /* is_ordered */ false,
+               codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongOrdered(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ false,
+               /* is_ordered */ true,
+               codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafePutLongVolatile(HInvoke* invoke) {
+  GenUnsafePut(invoke->GetLocations(),
+               Primitive::kPrimLong,
+               /* is_volatile */ true,
+               /* is_ordered */ false,
+               codegen_);
+}
+
+static void CreateIntIntIntIntIntToIntPlusTemps(ArenaAllocator* arena,
+                                                HInvoke* invoke,
+                                                Primitive::Type type) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      kUseBakerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
+  LocationSummary* locations = new (arena) LocationSummary(invoke,
+                                                           (can_call
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall),
+                                                           kIntrinsified);
+  locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RequiresRegister());
+  locations->SetInAt(4, Location::RequiresRegister());
+
+  // If heap poisoning is enabled, we don't want the unpoisoning
+  // operations to potentially clobber the output. Likewise when
+  // emitting a (Baker) read barrier, which may call.
+  Location::OutputOverlap overlaps =
+      ((kPoisonHeapReferences && type == Primitive::kPrimNot) || can_call)
+      ? Location::kOutputOverlap
+      : Location::kNoOutputOverlap;
+  locations->SetOut(Location::RequiresRegister(), overlaps);
+
+  // Temporary registers used in CAS. In the object case
+  // (UnsafeCASObject intrinsic), these are also used for
+  // card-marking, and possibly for (Baker) read barrier.
+  locations->AddTemp(Location::RequiresRegister());  // Pointer.
+  locations->AddTemp(Location::RequiresRegister());  // Temp 1.
+}
+
+static void GenCas(HInvoke* invoke, Primitive::Type type, CodeGeneratorARMVIXL* codegen) {
+  DCHECK_NE(type, Primitive::kPrimLong);
+
+  ArmVIXLAssembler* assembler = codegen->GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  Location out_loc = locations->Out();
+  vixl32::Register out = OutputRegister(invoke);                      // Boolean result.
+
+  vixl32::Register base = InputRegisterAt(invoke, 1);                 // Object pointer.
+  Location offset_loc = locations->InAt(2);
+  vixl32::Register offset = LowRegisterFrom(offset_loc);              // Offset (discard high 4B).
+  vixl32::Register expected = InputRegisterAt(invoke, 3);             // Expected.
+  vixl32::Register value = InputRegisterAt(invoke, 4);                // Value.
+
+  Location tmp_ptr_loc = locations->GetTemp(0);
+  vixl32::Register tmp_ptr = RegisterFrom(tmp_ptr_loc);               // Pointer to actual memory.
+  vixl32::Register tmp = RegisterFrom(locations->GetTemp(1));         // Value in memory.
+
+  if (type == Primitive::kPrimNot) {
+    // The only read barrier implementation supporting the
+    // UnsafeCASObject intrinsic is the Baker-style read barriers.
+    DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+    // Mark card for object assuming new value is stored. Worst case we will mark an unchanged
+    // object and scan the receiver at the next GC for nothing.
+    bool value_can_be_null = true;  // TODO: Worth finding out this information?
+    codegen->MarkGCCard(tmp_ptr, tmp, base, value, value_can_be_null);
+
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      // Need to make sure the reference stored in the field is a to-space
+      // one before attempting the CAS or the CAS could fail incorrectly.
+      codegen->GenerateReferenceLoadWithBakerReadBarrier(
+          invoke,
+          out_loc,  // Unused, used only as a "temporary" within the read barrier.
+          base,
+          /* offset */ 0u,
+          /* index */ offset_loc,
+          ScaleFactor::TIMES_1,
+          tmp_ptr_loc,
+          /* needs_null_check */ false,
+          /* always_update_field */ true,
+          &tmp);
+    }
+  }
+
+  // Prevent reordering with prior memory operations.
+  // Emit a DMB ISH instruction instead of an DMB ISHST one, as the
+  // latter allows a preceding load to be delayed past the STXR
+  // instruction below.
+  __ Dmb(vixl32::ISH);
+
+  __ Add(tmp_ptr, base, offset);
+
+  if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+    codegen->GetAssembler()->PoisonHeapReference(expected);
+    if (value.Is(expected)) {
+      // Do not poison `value`, as it is the same register as
+      // `expected`, which has just been poisoned.
+    } else {
+      codegen->GetAssembler()->PoisonHeapReference(value);
+    }
+  }
+
+  // do {
+  //   tmp = [r_ptr] - expected;
+  // } while (tmp == 0 && failure([r_ptr] <- r_new_value));
+  // result = tmp != 0;
+
+  vixl32::Label loop_head;
+  __ Bind(&loop_head);
+
+  __ Ldrex(tmp, tmp_ptr);
+
+  __ Subs(tmp, tmp, expected);
+
+  {
+    AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
+                               3 * kMaxInstructionSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+
+    __ itt(eq);
+    __ strex(eq, tmp, value, tmp_ptr);
+    __ cmp(eq, tmp, 1);
+  }
+
+  __ B(eq, &loop_head);
+
+  __ Dmb(vixl32::ISH);
+
+  __ Rsbs(out, tmp, 1);
+
+  {
+    AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
+                               2 * kMaxInstructionSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+
+    __ it(cc);
+    __ mov(cc, out, 0);
+  }
+
+  if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
+    codegen->GetAssembler()->UnpoisonHeapReference(expected);
+    if (value.Is(expected)) {
+      // Do not unpoison `value`, as it is the same register as
+      // `expected`, which has just been unpoisoned.
+    } else {
+      codegen->GetAssembler()->UnpoisonHeapReference(value);
+    }
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
+  CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimInt);
+}
+void IntrinsicLocationsBuilderARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The only read barrier implementation supporting the
+  // UnsafeCASObject intrinsic is the Baker-style read barriers.
+  if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+    return;
+  }
+
+  CreateIntIntIntIntIntToIntPlusTemps(arena_, invoke, Primitive::kPrimNot);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASInt(HInvoke* invoke) {
+  GenCas(invoke, Primitive::kPrimInt, codegen_);
+}
+void IntrinsicCodeGeneratorARMVIXL::VisitUnsafeCASObject(HInvoke* invoke) {
+  // The only read barrier implementation supporting the
+  // UnsafeCASObject intrinsic is the Baker-style read barriers.
+  DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+  GenCas(invoke, Primitive::kPrimNot, codegen_);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
+  // The inputs plus one temp.
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            invoke->InputAt(1)->CanBeNull()
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  // Need temporary registers for String compression's feature.
+  if (mirror::kUseStringCompression) {
+    locations->AddTemp(Location::RequiresRegister());
+  }
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitStringCompareTo(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  vixl32::Register str = InputRegisterAt(invoke, 0);
+  vixl32::Register arg = InputRegisterAt(invoke, 1);
+  vixl32::Register out = OutputRegister(invoke);
+
+  vixl32::Register temp0 = RegisterFrom(locations->GetTemp(0));
+  vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
+  vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
+  vixl32::Register temp3;
+  if (mirror::kUseStringCompression) {
+    temp3 = RegisterFrom(locations->GetTemp(3));
+  }
+
+  vixl32::Label loop;
+  vixl32::Label find_char_diff;
+  vixl32::Label end;
+  vixl32::Label different_compression;
+
+  // Get offsets of count and value fields within a string object.
+  const int32_t count_offset = mirror::String::CountOffset().Int32Value();
+  const int32_t value_offset = mirror::String::ValueOffset().Int32Value();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  // Take slow path and throw if input can be and is null.
+  SlowPathCodeARMVIXL* slow_path = nullptr;
+  const bool can_slow_path = invoke->InputAt(1)->CanBeNull();
+  if (can_slow_path) {
+    slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
+    codegen_->AddSlowPath(slow_path);
+    __ Cbz(arg, slow_path->GetEntryLabel());
+  }
+
+  // Reference equality check, return 0 if same reference.
+  __ Subs(out, str, arg);
+  __ B(eq, &end);
+
+  if (mirror::kUseStringCompression) {
+    // Load `count` fields of this and argument strings.
+    __ Ldr(temp3, MemOperand(str, count_offset));
+    __ Ldr(temp2, MemOperand(arg, count_offset));
+    // Extract lengths from the `count` fields.
+    __ Lsr(temp0, temp3, 1u);
+    __ Lsr(temp1, temp2, 1u);
+  } else {
+    // Load lengths of this and argument strings.
+    __ Ldr(temp0, MemOperand(str, count_offset));
+    __ Ldr(temp1, MemOperand(arg, count_offset));
+  }
+  // out = length diff.
+  __ Subs(out, temp0, temp1);
+  // temp0 = min(len(str), len(arg)).
+
+  {
+    AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
+                               2 * kMaxInstructionSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+
+    __ it(gt);
+    __ mov(gt, temp0, temp1);
+  }
+
+  // Shorter string is empty?
+  __ Cbz(temp0, &end);
+
+  if (mirror::kUseStringCompression) {
+    // Check if both strings using same compression style to use this comparison loop.
+    __ Eors(temp2, temp2, temp3);
+    __ Lsrs(temp2, temp2, 1u);
+    __ B(cs, &different_compression);
+    // For string compression, calculate the number of bytes to compare (not chars).
+    // This could in theory exceed INT32_MAX, so treat temp0 as unsigned.
+    __ Lsls(temp3, temp3, 31u);  // Extract purely the compression flag.
+
+    AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
+                               2 * kMaxInstructionSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+
+    __ it(ne);
+    __ add(ne, temp0, temp0, temp0);
+  }
+
+  // Store offset of string value in preparation for comparison loop.
+  __ Mov(temp1, value_offset);
+
+  // Assertions that must hold in order to compare multiple characters at a time.
+  CHECK_ALIGNED(value_offset, 8);
+  static_assert(IsAligned<8>(kObjectAlignment),
+                "String data must be 8-byte aligned for unrolled CompareTo loop.");
+
+  const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
+  DCHECK_EQ(char_size, 2u);
+
+  UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+
+  vixl32::Label find_char_diff_2nd_cmp;
+  // Unrolled loop comparing 4x16-bit chars per iteration (ok because of string data alignment).
+  __ Bind(&loop);
+  vixl32::Register temp_reg = temps.Acquire();
+  __ Ldr(temp_reg, MemOperand(str, temp1));
+  __ Ldr(temp2, MemOperand(arg, temp1));
+  __ Cmp(temp_reg, temp2);
+  __ B(ne, &find_char_diff);
+  __ Add(temp1, temp1, char_size * 2);
+
+  __ Ldr(temp_reg, MemOperand(str, temp1));
+  __ Ldr(temp2, MemOperand(arg, temp1));
+  __ Cmp(temp_reg, temp2);
+  __ B(ne, &find_char_diff_2nd_cmp);
+  __ Add(temp1, temp1, char_size * 2);
+  // With string compression, we have compared 8 bytes, otherwise 4 chars.
+  __ Subs(temp0, temp0, (mirror::kUseStringCompression ? 8 : 4));
+  __ B(hi, &loop);
+  __ B(&end);
+
+  __ Bind(&find_char_diff_2nd_cmp);
+  if (mirror::kUseStringCompression) {
+    __ Subs(temp0, temp0, 4);  // 4 bytes previously compared.
+    __ B(ls, &end);  // Was the second comparison fully beyond the end?
+  } else {
+    // Without string compression, we can start treating temp0 as signed
+    // and rely on the signed comparison below.
+    __ Sub(temp0, temp0, 2);
+  }
+
+  // Find the single character difference.
+  __ Bind(&find_char_diff);
+  // Get the bit position of the first character that differs.
+  __ Eor(temp1, temp2, temp_reg);
+  __ Rbit(temp1, temp1);
+  __ Clz(temp1, temp1);
+
+  // temp0 = number of characters remaining to compare.
+  // (Without string compression, it could be < 1 if a difference is found by the second CMP
+  // in the comparison loop, and after the end of the shorter string data).
+
+  // Without string compression (temp1 >> 4) = character where difference occurs between the last
+  // two words compared, in the interval [0,1].
+  // (0 for low half-word different, 1 for high half-word different).
+  // With string compression, (temp1 << 3) = byte where the difference occurs,
+  // in the interval [0,3].
+
+  // If temp0 <= (temp1 >> (kUseStringCompression ? 3 : 4)), the difference occurs outside
+  // the remaining string data, so just return length diff (out).
+  // The comparison is unsigned for string compression, otherwise signed.
+  __ Cmp(temp0, Operand(temp1, vixl32::LSR, (mirror::kUseStringCompression ? 3 : 4)));
+  __ B((mirror::kUseStringCompression ? ls : le), &end);
+
+  // Extract the characters and calculate the difference.
+  if (mirror::kUseStringCompression) {
+    // For compressed strings we need to clear 0x7 from temp1, for uncompressed we need to clear
+    // 0xf. We also need to prepare the character extraction mask `uncompressed ? 0xffffu : 0xffu`.
+    // The compression flag is now in the highest bit of temp3, so let's play some tricks.
+    __ orr(temp3, temp3, 0xffu << 23);                  // uncompressed ? 0xff800000u : 0x7ff80000u
+    __ bic(temp1, temp1, Operand(temp3, vixl32::LSR, 31 - 3));  // &= ~(uncompressed ? 0xfu : 0x7u)
+    __ Asr(temp3, temp3, 7u);                           // uncompressed ? 0xffff0000u : 0xff0000u.
+    __ Lsr(temp2, temp2, temp1);                        // Extract second character.
+    __ Lsr(temp3, temp3, 16u);                          // uncompressed ? 0xffffu : 0xffu
+    __ Lsr(out, temp_reg, temp1);                       // Extract first character.
+    __ and_(temp2, temp2, temp3);
+    __ and_(out, out, temp3);
+  } else {
+    __ bic(temp1, temp1, 0xf);
+    __ Lsr(temp2, temp2, temp1);
+    __ Lsr(out, temp_reg, temp1);
+    __ movt(temp2, 0);
+    __ movt(out, 0);
+  }
+
+  __ Sub(out, out, temp2);
+  temps.Release(temp_reg);
+
+  if (mirror::kUseStringCompression) {
+    __ B(&end);
+    __ Bind(&different_compression);
+
+    // Comparison for different compression style.
+    const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
+    DCHECK_EQ(c_char_size, 1u);
+
+    // We want to free up the temp3, currently holding `str.count`, for comparison.
+    // So, we move it to the bottom bit of the iteration count `temp0` which we tnen
+    // need to treat as unsigned. Start by freeing the bit with an ADD and continue
+    // further down by a LSRS+SBC which will flip the meaning of the flag but allow
+    // `subs temp0, #2; bhi different_compression_loop` to serve as the loop condition.
+    __ add(temp0, temp0, temp0);              // Unlike LSL, this ADD is always 16-bit.
+    // `temp1` will hold the compressed data pointer, `temp2` the uncompressed data pointer.
+    __ mov(temp1, str);
+    __ mov(temp2, arg);
+    __ Lsrs(temp3, temp3, 1u);                // Continue the move of the compression flag.
+    {
+      AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
+                                 3 * kMaxInstructionSizeInBytes,
+                                 CodeBufferCheckScope::kMaximumSize);
+      __ itt(cs);                             // Interleave with selection of temp1 and temp2.
+      __ mov(cs, temp1, arg);                 // Preserves flags.
+      __ mov(cs, temp2, str);                 // Preserves flags.
+    }
+    __ sbc(temp0, temp0, 0);                  // Complete the move of the compression flag.
+
+    // Adjust temp1 and temp2 from string pointers to data pointers.
+    __ add(temp1, temp1, value_offset);
+    __ add(temp2, temp2, value_offset);
+
+    vixl32::Label different_compression_loop;
+    vixl32::Label different_compression_diff;
+
+    // Main loop for different compression.
+    temp_reg = temps.Acquire();
+    __ Bind(&different_compression_loop);
+    __ Ldrb(temp_reg, MemOperand(temp1, c_char_size, PostIndex));
+    __ Ldrh(temp3, MemOperand(temp2, char_size, PostIndex));
+    __ cmp(temp_reg, temp3);
+    __ B(ne, &different_compression_diff);
+    __ Subs(temp0, temp0, 2);
+    __ B(hi, &different_compression_loop);
+    __ B(&end);
+
+    // Calculate the difference.
+    __ Bind(&different_compression_diff);
+    __ Sub(out, temp_reg, temp3);
+    temps.Release(temp_reg);
+    // Flip the difference if the `arg` is compressed.
+    // `temp0` contains inverted `str` compression flag, i.e the same as `arg` compression flag.
+    __ Lsrs(temp0, temp0, 1u);
+    static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                  "Expecting 0=compressed, 1=uncompressed");
+
+    AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
+                               2 * kMaxInstructionSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+    __ it(cc);
+    __ rsb(cc, out, out, 0);
+  }
+
+  __ Bind(&end);
+
+  if (can_slow_path) {
+    __ Bind(slow_path->GetExitLabel());
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitStringEquals(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  // Temporary registers to store lengths of strings and for calculations.
+  // Using instruction cbz requires a low register, so explicitly set a temp to be R0.
+  locations->AddTemp(LocationFrom(r0));
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+
+  locations->SetOut(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitStringEquals(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  vixl32::Register str = InputRegisterAt(invoke, 0);
+  vixl32::Register arg = InputRegisterAt(invoke, 1);
+  vixl32::Register out = OutputRegister(invoke);
+
+  vixl32::Register temp = RegisterFrom(locations->GetTemp(0));
+  vixl32::Register temp1 = RegisterFrom(locations->GetTemp(1));
+  vixl32::Register temp2 = RegisterFrom(locations->GetTemp(2));
+
+  vixl32::Label loop;
+  vixl32::Label end;
+  vixl32::Label return_true;
+  vixl32::Label return_false;
+
+  // Get offsets of count, value, and class fields within a string object.
+  const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+  const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
+  const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  StringEqualsOptimizations optimizations(invoke);
+  if (!optimizations.GetArgumentNotNull()) {
+    // Check if input is null, return false if it is.
+    __ Cbz(arg, &return_false);
+  }
+
+  // Reference equality check, return true if same reference.
+  __ Cmp(str, arg);
+  __ B(eq, &return_true);
+
+  if (!optimizations.GetArgumentIsString()) {
+    // Instanceof check for the argument by comparing class fields.
+    // All string objects must have the same type since String cannot be subclassed.
+    // Receiver must be a string object, so its class field is equal to all strings' class fields.
+    // If the argument is a string object, its class field must be equal to receiver's class field.
+    __ Ldr(temp, MemOperand(str, class_offset));
+    __ Ldr(temp1, MemOperand(arg, class_offset));
+    __ Cmp(temp, temp1);
+    __ B(ne, &return_false);
+  }
+
+  // Load `count` fields of this and argument strings.
+  __ Ldr(temp, MemOperand(str, count_offset));
+  __ Ldr(temp1, MemOperand(arg, count_offset));
+  // Check if `count` fields are equal, return false if they're not.
+  // Also compares the compression style, if differs return false.
+  __ Cmp(temp, temp1);
+  __ B(ne, &return_false);
+  // Return true if both strings are empty. Even with string compression `count == 0` means empty.
+  static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                "Expecting 0=compressed, 1=uncompressed");
+  __ Cbz(temp, &return_true);
+
+  // Assertions that must hold in order to compare strings 4 bytes at a time.
+  DCHECK_ALIGNED(value_offset, 4);
+  static_assert(IsAligned<4>(kObjectAlignment), "String data must be aligned for fast compare.");
+
+  if (mirror::kUseStringCompression) {
+    // For string compression, calculate the number of bytes to compare (not chars).
+    // This could in theory exceed INT32_MAX, so treat temp as unsigned.
+    __ Lsrs(temp, temp, 1u);                        // Extract length and check compression flag.
+    AssemblerAccurateScope aas(assembler->GetVIXLAssembler(),
+                               2 * kMaxInstructionSizeInBytes,
+                               CodeBufferCheckScope::kMaximumSize);
+    __ it(cs);                                      // If uncompressed,
+    __ add(cs, temp, temp, temp);                   //   double the byte count.
+  }
+
+  // Store offset of string value in preparation for comparison loop.
+  __ Mov(temp1, value_offset);
+
+  // Loop to compare strings 4 bytes at a time starting at the front of the string.
+  // Ok to do this because strings are zero-padded to kObjectAlignment.
+  __ Bind(&loop);
+  __ Ldr(out, MemOperand(str, temp1));
+  __ Ldr(temp2, MemOperand(arg, temp1));
+  __ Add(temp1, temp1, sizeof(uint32_t));
+  __ Cmp(out, temp2);
+  __ B(ne, &return_false);
+  // With string compression, we have compared 4 bytes, otherwise 2 chars.
+  __ Subs(temp, temp, mirror::kUseStringCompression ? 4 : 2);
+  __ B(hi, &loop);
+
+  // Return true and exit the function.
+  // If loop does not result in returning false, we return true.
+  __ Bind(&return_true);
+  __ Mov(out, 1);
+  __ B(&end);
+
+  // Return false and exit the function.
+  __ Bind(&return_false);
+  __ Mov(out, 0);
+  __ Bind(&end);
+}
+
+static void GenerateVisitStringIndexOf(HInvoke* invoke,
+                                       ArmVIXLAssembler* assembler,
+                                       CodeGeneratorARMVIXL* codegen,
+                                       ArenaAllocator* allocator,
+                                       bool start_at_zero) {
+  LocationSummary* locations = invoke->GetLocations();
+
+  // Note that the null check must have been done earlier.
+  DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
+
+  // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
+  // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
+  SlowPathCodeARMVIXL* slow_path = nullptr;
+  HInstruction* code_point = invoke->InputAt(1);
+  if (code_point->IsIntConstant()) {
+    if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
+        std::numeric_limits<uint16_t>::max()) {
+      // Always needs the slow-path. We could directly dispatch to it, but this case should be
+      // rare, so for simplicity just put the full slow-path down and branch unconditionally.
+      slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke);
+      codegen->AddSlowPath(slow_path);
+      __ B(slow_path->GetEntryLabel());
+      __ Bind(slow_path->GetExitLabel());
+      return;
+    }
+  } else if (code_point->GetType() != Primitive::kPrimChar) {
+    vixl32::Register char_reg = InputRegisterAt(invoke, 1);
+    // 0xffff is not modified immediate but 0x10000 is, so use `>= 0x10000` instead of `> 0xffff`.
+    __ Cmp(char_reg, static_cast<uint32_t>(std::numeric_limits<uint16_t>::max()) + 1);
+    slow_path = new (allocator) IntrinsicSlowPathARMVIXL(invoke);
+    codegen->AddSlowPath(slow_path);
+    __ B(hs, slow_path->GetEntryLabel());
+  }
+
+  if (start_at_zero) {
+    vixl32::Register tmp_reg = RegisterFrom(locations->GetTemp(0));
+    DCHECK(tmp_reg.Is(r2));
+    // Start-index = 0.
+    __ Mov(tmp_reg, 0);
+  }
+
+  codegen->InvokeRuntime(kQuickIndexOf, invoke, invoke->GetDexPc(), slow_path);
+  CheckEntrypointTypes<kQuickIndexOf, int32_t, void*, uint32_t, uint32_t>();
+
+  if (slow_path != nullptr) {
+    __ Bind(slow_path->GetExitLabel());
+  }
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
+                                                            kIntrinsified);
+  // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
+  // best to align the inputs accordingly.
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->SetOut(LocationFrom(r0));
+
+  // Need to send start-index=0.
+  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOf(HInvoke* invoke) {
+  GenerateVisitStringIndexOf(
+      invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
+                                                            kIntrinsified);
+  // We have a hand-crafted assembly stub that follows the runtime calling convention. So it's
+  // best to align the inputs accordingly.
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
+  locations->SetOut(LocationFrom(r0));
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitStringIndexOfAfter(HInvoke* invoke) {
+  GenerateVisitStringIndexOf(
+      invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
+  locations->SetInAt(3, LocationFrom(calling_convention.GetRegisterAt(3)));
+  locations->SetOut(LocationFrom(r0));
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromBytes(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  vixl32::Register byte_array = InputRegisterAt(invoke, 0);
+  __ Cmp(byte_array, 0);
+  SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ B(eq, slow_path->GetEntryLabel());
+
+  codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc(), slow_path);
+  CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnMainOnly,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetInAt(1, LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->SetInAt(2, LocationFrom(calling_convention.GetRegisterAt(2)));
+  locations->SetOut(LocationFrom(r0));
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromChars(HInvoke* invoke) {
+  // No need to emit code checking whether `locations->InAt(2)` is a null
+  // pointer, as callers of the native method
+  //
+  //   java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
+  //
+  // all include a null check on `data` before calling that method.
+  codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
+  CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kCallOnMainAndSlowPath,
+                                                            kIntrinsified);
+  InvokeRuntimeCallingConventionARMVIXL calling_convention;
+  locations->SetInAt(0, LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->SetOut(LocationFrom(r0));
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitStringNewStringFromString(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  vixl32::Register string_to_copy = InputRegisterAt(invoke, 0);
+  __ Cmp(string_to_copy, 0);
+  SlowPathCodeARMVIXL* slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
+  codegen_->AddSlowPath(slow_path);
+  __ B(eq, slow_path->GetEntryLabel());
+
+  codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc(), slow_path);
+  CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
+
+  __ Bind(slow_path->GetExitLabel());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
+  // The only read barrier implementation supporting the
+  // SystemArrayCopy intrinsic is the Baker-style read barriers.
+  if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
+    return;
+  }
+
+  CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
+  LocationSummary* locations = invoke->GetLocations();
+  if (locations == nullptr) {
+    return;
+  }
+
+  HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
+  HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
+  HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
+
+  if (src_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(src_pos->GetValue())) {
+    locations->SetInAt(1, Location::RequiresRegister());
+  }
+  if (dest_pos != nullptr && !assembler_->ShifterOperandCanAlwaysHold(dest_pos->GetValue())) {
+    locations->SetInAt(3, Location::RequiresRegister());
+  }
+  if (length != nullptr && !assembler_->ShifterOperandCanAlwaysHold(length->GetValue())) {
+    locations->SetInAt(4, Location::RequiresRegister());
+  }
+  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // Temporary register IP cannot be used in
+    // ReadBarrierSystemArrayCopySlowPathARM (because that register
+    // is clobbered by ReadBarrierMarkRegX entry points). Get an extra
+    // temporary register from the register allocator.
+    locations->AddTemp(Location::RequiresRegister());
+  }
+}
+
+static void CheckPosition(ArmVIXLAssembler* assembler,
+                          Location pos,
+                          vixl32::Register input,
+                          Location length,
+                          SlowPathCodeARMVIXL* slow_path,
+                          vixl32::Register temp,
+                          bool length_is_input_length = false) {
+  // Where is the length in the Array?
+  const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
+
+  if (pos.IsConstant()) {
+    int32_t pos_const = Int32ConstantFrom(pos);
+    if (pos_const == 0) {
+      if (!length_is_input_length) {
+        // Check that length(input) >= length.
+        __ Ldr(temp, MemOperand(input, length_offset));
+        if (length.IsConstant()) {
+          __ Cmp(temp, Int32ConstantFrom(length));
+        } else {
+          __ Cmp(temp, RegisterFrom(length));
+        }
+        __ B(lt, slow_path->GetEntryLabel());
+      }
+    } else {
+      // Check that length(input) >= pos.
+      __ Ldr(temp, MemOperand(input, length_offset));
+      __ Subs(temp, temp, pos_const);
+      __ B(lt, slow_path->GetEntryLabel());
+
+      // Check that (length(input) - pos) >= length.
+      if (length.IsConstant()) {
+        __ Cmp(temp, Int32ConstantFrom(length));
+      } else {
+        __ Cmp(temp, RegisterFrom(length));
+      }
+      __ B(lt, slow_path->GetEntryLabel());
+    }
+  } else if (length_is_input_length) {
+    // The only way the copy can succeed is if pos is zero.
+    vixl32::Register pos_reg = RegisterFrom(pos);
+    __ Cbnz(pos_reg, slow_path->GetEntryLabel());
+  } else {
+    // Check that pos >= 0.
+    vixl32::Register pos_reg = RegisterFrom(pos);
+    __ Cmp(pos_reg, 0);
+    __ B(lt, slow_path->GetEntryLabel());
+
+    // Check that pos <= length(input).
+    __ Ldr(temp, MemOperand(input, length_offset));
+    __ Subs(temp, temp, pos_reg);
+    __ B(lt, slow_path->GetEntryLabel());
+
+    // Check that (length(input) - pos) >= length.
+    if (length.IsConstant()) {
+      __ Cmp(temp, Int32ConstantFrom(length));
+    } else {
+      __ Cmp(temp, RegisterFrom(length));
+    }
+    __ B(lt, slow_path->GetEntryLabel());
+  }
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitSystemArrayCopy(HInvoke* invoke) {
+  // The only read barrier implementation supporting the
+  // SystemArrayCopy intrinsic is the Baker-style read barriers.
+  DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+  ArmVIXLAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
+  uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
+  uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
+  uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
+  uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
+
+  vixl32::Register src = InputRegisterAt(invoke, 0);
+  Location src_pos = locations->InAt(1);
+  vixl32::Register dest = InputRegisterAt(invoke, 2);
+  Location dest_pos = locations->InAt(3);
+  Location length = locations->InAt(4);
+  Location temp1_loc = locations->GetTemp(0);
+  vixl32::Register temp1 = RegisterFrom(temp1_loc);
+  Location temp2_loc = locations->GetTemp(1);
+  vixl32::Register temp2 = RegisterFrom(temp2_loc);
+  Location temp3_loc = locations->GetTemp(2);
+  vixl32::Register temp3 = RegisterFrom(temp3_loc);
+
+  SlowPathCodeARMVIXL* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathARMVIXL(invoke);
+  codegen_->AddSlowPath(intrinsic_slow_path);
+
+  vixl32::Label conditions_on_positions_validated;
+  SystemArrayCopyOptimizations optimizations(invoke);
+
+  // If source and destination are the same, we go to slow path if we need to do
+  // forward copying.
+  if (src_pos.IsConstant()) {
+    int32_t src_pos_constant = Int32ConstantFrom(src_pos);
+    if (dest_pos.IsConstant()) {
+      int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
+      if (optimizations.GetDestinationIsSource()) {
+        // Checked when building locations.
+        DCHECK_GE(src_pos_constant, dest_pos_constant);
+      } else if (src_pos_constant < dest_pos_constant) {
+        __ Cmp(src, dest);
+        __ B(eq, intrinsic_slow_path->GetEntryLabel());
+      }
+
+      // Checked when building locations.
+      DCHECK(!optimizations.GetDestinationIsSource()
+             || (src_pos_constant >= Int32ConstantFrom(dest_pos)));
+    } else {
+      if (!optimizations.GetDestinationIsSource()) {
+        __ Cmp(src, dest);
+        __ B(ne, &conditions_on_positions_validated);
+      }
+      __ Cmp(RegisterFrom(dest_pos), src_pos_constant);
+      __ B(gt, intrinsic_slow_path->GetEntryLabel());
+    }
+  } else {
+    if (!optimizations.GetDestinationIsSource()) {
+      __ Cmp(src, dest);
+      __ B(ne, &conditions_on_positions_validated);
+    }
+    if (dest_pos.IsConstant()) {
+      int32_t dest_pos_constant = Int32ConstantFrom(dest_pos);
+      __ Cmp(RegisterFrom(src_pos), dest_pos_constant);
+    } else {
+      __ Cmp(RegisterFrom(src_pos), RegisterFrom(dest_pos));
+    }
+    __ B(lt, intrinsic_slow_path->GetEntryLabel());
+  }
+
+  __ Bind(&conditions_on_positions_validated);
+
+  if (!optimizations.GetSourceIsNotNull()) {
+    // Bail out if the source is null.
+    __ Cbz(src, intrinsic_slow_path->GetEntryLabel());
+  }
+
+  if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
+    // Bail out if the destination is null.
+    __ Cbz(dest, intrinsic_slow_path->GetEntryLabel());
+  }
+
+  // If the length is negative, bail out.
+  // We have already checked in the LocationsBuilder for the constant case.
+  if (!length.IsConstant() &&
+      !optimizations.GetCountIsSourceLength() &&
+      !optimizations.GetCountIsDestinationLength()) {
+    __ Cmp(RegisterFrom(length), 0);
+    __ B(lt, intrinsic_slow_path->GetEntryLabel());
+  }
+
+  // Validity checks: source.
+  CheckPosition(assembler,
+                src_pos,
+                src,
+                length,
+                intrinsic_slow_path,
+                temp1,
+                optimizations.GetCountIsSourceLength());
+
+  // Validity checks: dest.
+  CheckPosition(assembler,
+                dest_pos,
+                dest,
+                length,
+                intrinsic_slow_path,
+                temp1,
+                optimizations.GetCountIsDestinationLength());
+
+  if (!optimizations.GetDoesNotNeedTypeCheck()) {
+    // Check whether all elements of the source array are assignable to the component
+    // type of the destination array. We do two checks: the classes are the same,
+    // or the destination is Object[]. If none of these checks succeed, we go to the
+    // slow path.
+
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+        // /* HeapReference<Class> */ temp1 = src->klass_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+        // Bail out if the source is not a non primitive array.
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+        __ Cbz(temp1, intrinsic_slow_path->GetEntryLabel());
+        // If heap poisoning is enabled, `temp1` has been unpoisoned
+        // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+        // /* uint16_t */ temp1 = static_cast<uint16>(temp1->primitive_type_);
+        __ Ldrh(temp1, MemOperand(temp1, primitive_offset));
+        static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+        __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+      }
+
+      // /* HeapReference<Class> */ temp1 = dest->klass_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp1_loc, dest, class_offset, temp2_loc, /* needs_null_check */ false);
+
+      if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+        // Bail out if the destination is not a non primitive array.
+        //
+        // Register `temp1` is not trashed by the read barrier emitted
+        // by GenerateFieldLoadWithBakerReadBarrier below, as that
+        // method produces a call to a ReadBarrierMarkRegX entry point,
+        // which saves all potentially live registers, including
+        // temporaries such a `temp1`.
+        // /* HeapReference<Class> */ temp2 = temp1->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp2_loc, temp1, component_offset, temp3_loc, /* needs_null_check */ false);
+        __ Cbz(temp2, intrinsic_slow_path->GetEntryLabel());
+        // If heap poisoning is enabled, `temp2` has been unpoisoned
+        // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+        // /* uint16_t */ temp2 = static_cast<uint16>(temp2->primitive_type_);
+        __ Ldrh(temp2, MemOperand(temp2, primitive_offset));
+        static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+        __ Cbnz(temp2, intrinsic_slow_path->GetEntryLabel());
+      }
+
+      // For the same reason given earlier, `temp1` is not trashed by the
+      // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
+      // /* HeapReference<Class> */ temp2 = src->klass_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp2_loc, src, class_offset, temp3_loc, /* needs_null_check */ false);
+      // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
+      __ Cmp(temp1, temp2);
+
+      if (optimizations.GetDestinationIsTypedObjectArray()) {
+        vixl32::Label do_copy;
+        __ B(eq, &do_copy);
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        codegen_->GenerateFieldLoadWithBakerReadBarrier(
+            invoke, temp1_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+        // /* HeapReference<Class> */ temp1 = temp1->super_class_
+        // We do not need to emit a read barrier for the following
+        // heap reference load, as `temp1` is only used in a
+        // comparison with null below, and this reference is not
+        // kept afterwards.
+        __ Ldr(temp1, MemOperand(temp1, super_offset));
+        __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+        __ Bind(&do_copy);
+      } else {
+        __ B(ne, intrinsic_slow_path->GetEntryLabel());
+      }
+    } else {
+      // Non read barrier code.
+
+      // /* HeapReference<Class> */ temp1 = dest->klass_
+      __ Ldr(temp1, MemOperand(dest, class_offset));
+      // /* HeapReference<Class> */ temp2 = src->klass_
+      __ Ldr(temp2, MemOperand(src, class_offset));
+      bool did_unpoison = false;
+      if (!optimizations.GetDestinationIsNonPrimitiveArray() ||
+          !optimizations.GetSourceIsNonPrimitiveArray()) {
+        // One or two of the references need to be unpoisoned. Unpoison them
+        // both to make the identity check valid.
+        assembler->MaybeUnpoisonHeapReference(temp1);
+        assembler->MaybeUnpoisonHeapReference(temp2);
+        did_unpoison = true;
+      }
+
+      if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
+        // Bail out if the destination is not a non primitive array.
+        // /* HeapReference<Class> */ temp3 = temp1->component_type_
+        __ Ldr(temp3, MemOperand(temp1, component_offset));
+        __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+        assembler->MaybeUnpoisonHeapReference(temp3);
+        // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+        __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
+        static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+        __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
+      }
+
+      if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+        // Bail out if the source is not a non primitive array.
+        // /* HeapReference<Class> */ temp3 = temp2->component_type_
+        __ Ldr(temp3, MemOperand(temp2, component_offset));
+        __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+        assembler->MaybeUnpoisonHeapReference(temp3);
+        // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+        __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
+        static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+        __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
+      }
+
+      __ Cmp(temp1, temp2);
+
+      if (optimizations.GetDestinationIsTypedObjectArray()) {
+        vixl32::Label do_copy;
+        __ B(eq, &do_copy);
+        if (!did_unpoison) {
+          assembler->MaybeUnpoisonHeapReference(temp1);
+        }
+        // /* HeapReference<Class> */ temp1 = temp1->component_type_
+        __ Ldr(temp1, MemOperand(temp1, component_offset));
+        assembler->MaybeUnpoisonHeapReference(temp1);
+        // /* HeapReference<Class> */ temp1 = temp1->super_class_
+        __ Ldr(temp1, MemOperand(temp1, super_offset));
+        // No need to unpoison the result, we're comparing against null.
+        __ Cbnz(temp1, intrinsic_slow_path->GetEntryLabel());
+        __ Bind(&do_copy);
+      } else {
+        __ B(ne, intrinsic_slow_path->GetEntryLabel());
+      }
+    }
+  } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
+    DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
+    // Bail out if the source is not a non primitive array.
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      // /* HeapReference<Class> */ temp1 = src->klass_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp1_loc, src, class_offset, temp2_loc, /* needs_null_check */ false);
+      // /* HeapReference<Class> */ temp3 = temp1->component_type_
+      codegen_->GenerateFieldLoadWithBakerReadBarrier(
+          invoke, temp3_loc, temp1, component_offset, temp2_loc, /* needs_null_check */ false);
+      __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+      // If heap poisoning is enabled, `temp3` has been unpoisoned
+      // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
+    } else {
+      // /* HeapReference<Class> */ temp1 = src->klass_
+      __ Ldr(temp1, MemOperand(src, class_offset));
+      assembler->MaybeUnpoisonHeapReference(temp1);
+      // /* HeapReference<Class> */ temp3 = temp1->component_type_
+      __ Ldr(temp3, MemOperand(temp1, component_offset));
+      __ Cbz(temp3, intrinsic_slow_path->GetEntryLabel());
+      assembler->MaybeUnpoisonHeapReference(temp3);
+    }
+    // /* uint16_t */ temp3 = static_cast<uint16>(temp3->primitive_type_);
+    __ Ldrh(temp3, MemOperand(temp3, primitive_offset));
+    static_assert(Primitive::kPrimNot == 0, "Expected 0 for kPrimNot");
+    __ Cbnz(temp3, intrinsic_slow_path->GetEntryLabel());
+  }
+
+  int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
+  uint32_t element_size_shift = Primitive::ComponentSizeShift(Primitive::kPrimNot);
+  uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
+
+  // Compute the base source address in `temp1`.
+  if (src_pos.IsConstant()) {
+    int32_t constant = Int32ConstantFrom(src_pos);
+    __ Add(temp1, src, element_size * constant + offset);
+  } else {
+    __ Add(temp1, src, Operand(RegisterFrom(src_pos), vixl32::LSL, element_size_shift));
+    __ Add(temp1, temp1, offset);
+  }
+
+  // Compute the end source address in `temp3`.
+  if (length.IsConstant()) {
+    int32_t constant = Int32ConstantFrom(length);
+    __ Add(temp3, temp1, element_size * constant);
+  } else {
+    __ Add(temp3, temp1, Operand(RegisterFrom(length), vixl32::LSL, element_size_shift));
+  }
+
+  if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+    // The base destination address is computed later, as `temp2` is
+    // used for intermediate computations.
+
+    // SystemArrayCopy implementation for Baker read barriers (see
+    // also CodeGeneratorARM::GenerateReferenceLoadWithBakerReadBarrier):
+    //
+    //   if (src_ptr != end_ptr) {
+    //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
+    //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
+    //     bool is_gray = (rb_state == ReadBarrier::GrayState());
+    //     if (is_gray) {
+    //       // Slow-path copy.
+    //       do {
+    //         *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
+    //       } while (src_ptr != end_ptr)
+    //     } else {
+    //       // Fast-path copy.
+    //       do {
+    //         *dest_ptr++ = *src_ptr++;
+    //       } while (src_ptr != end_ptr)
+    //     }
+    //   }
+
+    vixl32::Label loop, done;
+
+    // Don't enter copy loop if `length == 0`.
+    __ Cmp(temp1, temp3);
+    __ B(eq, &done);
+
+    // /* int32_t */ monitor = src->monitor_
+    __ Ldr(temp2, MemOperand(src, monitor_offset));
+    // /* LockWord */ lock_word = LockWord(monitor)
+    static_assert(sizeof(LockWord) == sizeof(int32_t),
+                  "art::LockWord and int32_t have different sizes.");
+
+    // Introduce a dependency on the lock_word including the rb_state,
+    // which shall prevent load-load reordering without using
+    // a memory barrier (which would be more expensive).
+    // `src` is unchanged by this operation, but its value now depends
+    // on `temp2`.
+    __ Add(src, src, Operand(temp2, vixl32::LSR, 32));
+
+    // Slow path used to copy array when `src` is gray.
+    SlowPathCodeARMVIXL* read_barrier_slow_path =
+        new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathARMVIXL(invoke);
+    codegen_->AddSlowPath(read_barrier_slow_path);
+
+    // Given the numeric representation, it's enough to check the low bit of the
+    // rb_state. We do that by shifting the bit out of the lock word with LSRS
+    // which can be a 16-bit instruction unlike the TST immediate.
+    static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+    static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
+    __ Lsrs(temp2, temp2, LockWord::kReadBarrierStateShift + 1);
+    // Carry flag is the last bit shifted out by LSRS.
+    __ B(cs, read_barrier_slow_path->GetEntryLabel());
+
+    // Fast-path copy.
+
+    // Compute the base destination address in `temp2`.
+    if (dest_pos.IsConstant()) {
+      int32_t constant = Int32ConstantFrom(dest_pos);
+      __ Add(temp2, dest, element_size * constant + offset);
+    } else {
+      __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
+      __ Add(temp2, temp2, offset);
+    }
+
+    // Iterate over the arrays and do a raw copy of the objects. We don't need to
+    // poison/unpoison.
+    __ Bind(&loop);
+
+    {
+      UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+      const vixl32::Register temp_reg = temps.Acquire();
+
+      __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
+      __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
+    }
+
+    __ Cmp(temp1, temp3);
+    __ B(ne, &loop);
+
+    __ Bind(read_barrier_slow_path->GetExitLabel());
+    __ Bind(&done);
+  } else {
+    // Non read barrier code.
+
+    // Compute the base destination address in `temp2`.
+    if (dest_pos.IsConstant()) {
+      int32_t constant = Int32ConstantFrom(dest_pos);
+      __ Add(temp2, dest, element_size * constant + offset);
+    } else {
+      __ Add(temp2, dest, Operand(RegisterFrom(dest_pos), vixl32::LSL, element_size_shift));
+      __ Add(temp2, temp2, offset);
+    }
+
+    // Iterate over the arrays and do a raw copy of the objects. We don't need to
+    // poison/unpoison.
+    vixl32::Label loop, done;
+    __ Cmp(temp1, temp3);
+    __ B(eq, &done);
+    __ Bind(&loop);
+
+    {
+      UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+      const vixl32::Register temp_reg = temps.Acquire();
+
+      __ Ldr(temp_reg, MemOperand(temp1, element_size, PostIndex));
+      __ Str(temp_reg, MemOperand(temp2, element_size, PostIndex));
+    }
+
+    __ Cmp(temp1, temp3);
+    __ B(ne, &loop);
+    __ Bind(&done);
+  }
+
+  // We only need one card marking on the destination array.
+  codegen_->MarkGCCard(temp1, temp2, dest, NoReg, /* value_can_be_null */ false);
+
+  __ Bind(intrinsic_slow_path->GetExitLabel());
+}
+
+static void CreateFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  // If the graph is debuggable, all callee-saved floating-point registers are blocked by
+  // the code generator. Furthermore, the register allocator creates fixed live intervals
+  // for all caller-saved registers because we are doing a function call. As a result, if
+  // the input and output locations are unallocated, the register allocator runs out of
+  // registers and fails; however, a debuggable graph is not the common case.
+  if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
+    return;
+  }
+
+  DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
+  DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
+  DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
+
+  LocationSummary* const locations = new (arena) LocationSummary(invoke,
+                                                                 LocationSummary::kCallOnMainOnly,
+                                                                 kIntrinsified);
+  const InvokeRuntimeCallingConventionARMVIXL calling_convention;
+
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresFpuRegister());
+  // Native code uses the soft float ABI.
+  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
+}
+
+static void CreateFPFPToFPCallLocations(ArenaAllocator* arena, HInvoke* invoke) {
+  // If the graph is debuggable, all callee-saved floating-point registers are blocked by
+  // the code generator. Furthermore, the register allocator creates fixed live intervals
+  // for all caller-saved registers because we are doing a function call. As a result, if
+  // the input and output locations are unallocated, the register allocator runs out of
+  // registers and fails; however, a debuggable graph is not the common case.
+  if (invoke->GetBlock()->GetGraph()->IsDebuggable()) {
+    return;
+  }
+
+  DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
+  DCHECK_EQ(invoke->InputAt(0)->GetType(), Primitive::kPrimDouble);
+  DCHECK_EQ(invoke->InputAt(1)->GetType(), Primitive::kPrimDouble);
+  DCHECK_EQ(invoke->GetType(), Primitive::kPrimDouble);
+
+  LocationSummary* const locations = new (arena) LocationSummary(invoke,
+                                                                 LocationSummary::kCallOnMainOnly,
+                                                                 kIntrinsified);
+  const InvokeRuntimeCallingConventionARMVIXL calling_convention;
+
+  locations->SetInAt(0, Location::RequiresFpuRegister());
+  locations->SetInAt(1, Location::RequiresFpuRegister());
+  locations->SetOut(Location::RequiresFpuRegister());
+  // Native code uses the soft float ABI.
+  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(0)));
+  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(1)));
+  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(2)));
+  locations->AddTemp(LocationFrom(calling_convention.GetRegisterAt(3)));
+}
+
+static void GenFPToFPCall(HInvoke* invoke,
+                          ArmVIXLAssembler* assembler,
+                          CodeGeneratorARMVIXL* codegen,
+                          QuickEntrypointEnum entry) {
+  LocationSummary* const locations = invoke->GetLocations();
+
+  DCHECK_EQ(invoke->GetNumberOfArguments(), 1U);
+  DCHECK(locations->WillCall() && locations->Intrinsified());
+
+  // Native code uses the soft float ABI.
+  __ Vmov(RegisterFrom(locations->GetTemp(0)),
+          RegisterFrom(locations->GetTemp(1)),
+          InputDRegisterAt(invoke, 0));
+  codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
+  __ Vmov(OutputDRegister(invoke),
+          RegisterFrom(locations->GetTemp(0)),
+          RegisterFrom(locations->GetTemp(1)));
+}
+
+static void GenFPFPToFPCall(HInvoke* invoke,
+                            ArmVIXLAssembler* assembler,
+                            CodeGeneratorARMVIXL* codegen,
+                            QuickEntrypointEnum entry) {
+  LocationSummary* const locations = invoke->GetLocations();
+
+  DCHECK_EQ(invoke->GetNumberOfArguments(), 2U);
+  DCHECK(locations->WillCall() && locations->Intrinsified());
+
+  // Native code uses the soft float ABI.
+  __ Vmov(RegisterFrom(locations->GetTemp(0)),
+          RegisterFrom(locations->GetTemp(1)),
+          InputDRegisterAt(invoke, 0));
+  __ Vmov(RegisterFrom(locations->GetTemp(2)),
+          RegisterFrom(locations->GetTemp(3)),
+          InputDRegisterAt(invoke, 1));
+  codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
+  __ Vmov(OutputDRegister(invoke),
+          RegisterFrom(locations->GetTemp(0)),
+          RegisterFrom(locations->GetTemp(1)));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathCos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathCos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCos);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathSin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathSin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSin);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathAcos(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathAcos(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAcos);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathAsin(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathAsin(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAsin);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathCbrt(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathCbrt(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCbrt);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathCosh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathCosh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickCosh);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathExp(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathExp(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExp);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathExpm1(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathExpm1(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickExpm1);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathLog(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathLog(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathLog10(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathLog10(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickLog10);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathSinh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathSinh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickSinh);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathTan(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathTan(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTan);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathTanh(HInvoke* invoke) {
+  CreateFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathTanh(HInvoke* invoke) {
+  GenFPToFPCall(invoke, GetAssembler(), codegen_, kQuickTanh);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathAtan2(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathAtan2(HInvoke* invoke) {
+  GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickAtan2);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathHypot(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathHypot(HInvoke* invoke) {
+  GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickHypot);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
+  CreateFPFPToFPCallLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitMathNextAfter(HInvoke* invoke) {
+  GenFPFPToFPCall(invoke, GetAssembler(), codegen_, kQuickNextAfter);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverse(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  __ Rbit(OutputRegister(invoke), InputRegisterAt(invoke, 0));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitLongReverse(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitLongReverse(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  vixl32::Register in_reg_lo  = LowRegisterFrom(locations->InAt(0));
+  vixl32::Register in_reg_hi  = HighRegisterFrom(locations->InAt(0));
+  vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
+  vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
+
+  __ Rbit(out_reg_lo, in_reg_hi);
+  __ Rbit(out_reg_hi, in_reg_lo);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitIntegerReverseBytes(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  __ Rev(OutputRegister(invoke), InputRegisterAt(invoke, 0));
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitLongReverseBytes(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  vixl32::Register in_reg_lo  = LowRegisterFrom(locations->InAt(0));
+  vixl32::Register in_reg_hi  = HighRegisterFrom(locations->InAt(0));
+  vixl32::Register out_reg_lo = LowRegisterFrom(locations->Out());
+  vixl32::Register out_reg_hi = HighRegisterFrom(locations->Out());
+
+  __ Rev(out_reg_lo, in_reg_hi);
+  __ Rev(out_reg_hi, in_reg_lo);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitShortReverseBytes(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  __ Revsh(OutputRegister(invoke), InputRegisterAt(invoke, 0));
+}
+
+static void GenBitCount(HInvoke* instr, Primitive::Type type, ArmVIXLAssembler* assembler) {
+  DCHECK(Primitive::IsIntOrLongType(type)) << type;
+  DCHECK_EQ(instr->GetType(), Primitive::kPrimInt);
+  DCHECK_EQ(Primitive::PrimitiveKind(instr->InputAt(0)->GetType()), type);
+
+  bool is_long = type == Primitive::kPrimLong;
+  LocationSummary* locations = instr->GetLocations();
+  Location in = locations->InAt(0);
+  vixl32::Register src_0 = is_long ? LowRegisterFrom(in) : RegisterFrom(in);
+  vixl32::Register src_1 = is_long ? HighRegisterFrom(in) : src_0;
+  vixl32::SRegister tmp_s = LowSRegisterFrom(locations->GetTemp(0));
+  vixl32::DRegister tmp_d = DRegisterFrom(locations->GetTemp(0));
+  vixl32::Register  out_r = OutputRegister(instr);
+
+  // Move data from core register(s) to temp D-reg for bit count calculation, then move back.
+  // According to Cortex A57 and A72 optimization guides, compared to transferring to full D-reg,
+  // transferring data from core reg to upper or lower half of vfp D-reg requires extra latency,
+  // That's why for integer bit count, we use 'vmov d0, r0, r0' instead of 'vmov d0[0], r0'.
+  __ Vmov(tmp_d, src_1, src_0);     // Temp DReg |--src_1|--src_0|
+  __ Vcnt(Untyped8, tmp_d, tmp_d);  // Temp DReg |c|c|c|c|c|c|c|c|
+  __ Vpaddl(U8, tmp_d, tmp_d);      // Temp DReg |--c|--c|--c|--c|
+  __ Vpaddl(U16, tmp_d, tmp_d);     // Temp DReg |------c|------c|
+  if (is_long) {
+    __ Vpaddl(U32, tmp_d, tmp_d);   // Temp DReg |--------------c|
+  }
+  __ Vmov(out_r, tmp_s);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
+  CreateIntToIntLocations(arena_, invoke);
+  invoke->GetLocations()->AddTemp(Location::RequiresFpuRegister());
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitIntegerBitCount(HInvoke* invoke) {
+  GenBitCount(invoke, Primitive::kPrimInt, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitLongBitCount(HInvoke* invoke) {
+  VisitIntegerBitCount(invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitLongBitCount(HInvoke* invoke) {
+  GenBitCount(invoke, Primitive::kPrimLong, GetAssembler());
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
+  LocationSummary* locations = new (arena_) LocationSummary(invoke,
+                                                            LocationSummary::kNoCall,
+                                                            kIntrinsified);
+  locations->SetInAt(0, Location::RequiresRegister());
+  locations->SetInAt(1, Location::RequiresRegister());
+  locations->SetInAt(2, Location::RequiresRegister());
+  locations->SetInAt(3, Location::RequiresRegister());
+  locations->SetInAt(4, Location::RequiresRegister());
+
+  // Temporary registers to store lengths of strings and for calculations.
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+  locations->AddTemp(Location::RequiresRegister());
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitStringGetCharsNoCheck(HInvoke* invoke) {
+  ArmVIXLAssembler* assembler = GetAssembler();
+  LocationSummary* locations = invoke->GetLocations();
+
+  // Check assumption that sizeof(Char) is 2 (used in scaling below).
+  const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
+  DCHECK_EQ(char_size, 2u);
+
+  // Location of data in char array buffer.
+  const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
+
+  // Location of char array data in string.
+  const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
+
+  // void getCharsNoCheck(int srcBegin, int srcEnd, char[] dst, int dstBegin);
+  // Since getChars() calls getCharsNoCheck() - we use registers rather than constants.
+  vixl32::Register srcObj = InputRegisterAt(invoke, 0);
+  vixl32::Register srcBegin = InputRegisterAt(invoke, 1);
+  vixl32::Register srcEnd = InputRegisterAt(invoke, 2);
+  vixl32::Register dstObj = InputRegisterAt(invoke, 3);
+  vixl32::Register dstBegin = InputRegisterAt(invoke, 4);
+
+  vixl32::Register num_chr = RegisterFrom(locations->GetTemp(0));
+  vixl32::Register src_ptr = RegisterFrom(locations->GetTemp(1));
+  vixl32::Register dst_ptr = RegisterFrom(locations->GetTemp(2));
+
+  vixl32::Label done, compressed_string_loop;
+  // dst to be copied.
+  __ Add(dst_ptr, dstObj, data_offset);
+  __ Add(dst_ptr, dst_ptr, Operand(dstBegin, vixl32::LSL, 1));
+
+  __ Subs(num_chr, srcEnd, srcBegin);
+  // Early out for valid zero-length retrievals.
+  __ B(eq, &done);
+
+  // src range to copy.
+  __ Add(src_ptr, srcObj, value_offset);
+
+  UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+  vixl32::Register temp;
+  vixl32::Label compressed_string_preloop;
+  if (mirror::kUseStringCompression) {
+    // Location of count in string.
+    const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
+    temp = temps.Acquire();
+    // String's length.
+    __ Ldr(temp, MemOperand(srcObj, count_offset));
+    __ Tst(temp, 1);
+    temps.Release(temp);
+    __ B(eq, &compressed_string_preloop);
+  }
+  __ Add(src_ptr, src_ptr, Operand(srcBegin, vixl32::LSL, 1));
+
+  // Do the copy.
+  vixl32::Label loop, remainder;
+
+  temp = temps.Acquire();
+  // Save repairing the value of num_chr on the < 4 character path.
+  __ Subs(temp, num_chr, 4);
+  __ B(lt, &remainder);
+
+  // Keep the result of the earlier subs, we are going to fetch at least 4 characters.
+  __ Mov(num_chr, temp);
+
+  // Main loop used for longer fetches loads and stores 4x16-bit characters at a time.
+  // (LDRD/STRD fault on unaligned addresses and it's not worth inlining extra code
+  // to rectify these everywhere this intrinsic applies.)
+  __ Bind(&loop);
+  __ Ldr(temp, MemOperand(src_ptr, char_size * 2));
+  __ Subs(num_chr, num_chr, 4);
+  __ Str(temp, MemOperand(dst_ptr, char_size * 2));
+  __ Ldr(temp, MemOperand(src_ptr, char_size * 4, PostIndex));
+  __ Str(temp, MemOperand(dst_ptr, char_size * 4, PostIndex));
+  temps.Release(temp);
+  __ B(ge, &loop);
+
+  __ Adds(num_chr, num_chr, 4);
+  __ B(eq, &done);
+
+  // Main loop for < 4 character case and remainder handling. Loads and stores one
+  // 16-bit Java character at a time.
+  __ Bind(&remainder);
+  temp = temps.Acquire();
+  __ Ldrh(temp, MemOperand(src_ptr, char_size, PostIndex));
+  __ Subs(num_chr, num_chr, 1);
+  __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
+  temps.Release(temp);
+  __ B(gt, &remainder);
+
+  if (mirror::kUseStringCompression) {
+    __ B(&done);
+
+    const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
+    DCHECK_EQ(c_char_size, 1u);
+    // Copy loop for compressed src, copying 1 character (8-bit) to (16-bit) at a time.
+    __ Bind(&compressed_string_preloop);
+    __ Add(src_ptr, src_ptr, srcBegin);
+    __ Bind(&compressed_string_loop);
+    temp = temps.Acquire();
+    __ Ldrb(temp, MemOperand(src_ptr, c_char_size, PostIndex));
+    __ Strh(temp, MemOperand(dst_ptr, char_size, PostIndex));
+    temps.Release(temp);
+    __ Subs(num_chr, num_chr, 1);
+    __ B(gt, &compressed_string_loop);
+  }
+
+  __ Bind(&done);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitFloatIsInfinite(HInvoke* invoke) {
+  ArmVIXLAssembler* const assembler = GetAssembler();
+  const vixl32::Register out = OutputRegister(invoke);
+  // Shifting left by 1 bit makes the value encodable as an immediate operand;
+  // we don't care about the sign bit anyway.
+  constexpr uint32_t infinity = kPositiveInfinityFloat << 1U;
+
+  __ Vmov(out, InputSRegisterAt(invoke, 0));
+  // We don't care about the sign bit, so shift left.
+  __ Lsl(out, out, 1);
+  __ Eor(out, out, infinity);
+  // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
+  __ Clz(out, out);
+  // Any number less than 32 logically shifted right by 5 bits results in 0;
+  // the same operation on 32 yields 1.
+  __ Lsr(out, out, 5);
+}
+
+void IntrinsicLocationsBuilderARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
+  CreateFPToIntLocations(arena_, invoke);
+}
+
+void IntrinsicCodeGeneratorARMVIXL::VisitDoubleIsInfinite(HInvoke* invoke) {
+  ArmVIXLAssembler* const assembler = GetAssembler();
+  const vixl32::Register out = OutputRegister(invoke);
+  UseScratchRegisterScope temps(assembler->GetVIXLAssembler());
+  const vixl32::Register temp = temps.Acquire();
+  // The highest 32 bits of double precision positive infinity separated into
+  // two constants encodable as immediate operands.
+  constexpr uint32_t infinity_high  = 0x7f000000U;
+  constexpr uint32_t infinity_high2 = 0x00f00000U;
+
+  static_assert((infinity_high | infinity_high2) ==
+                    static_cast<uint32_t>(kPositiveInfinityDouble >> 32U),
+                "The constants do not add up to the high 32 bits of double "
+                "precision positive infinity.");
+  __ Vmov(temp, out, InputDRegisterAt(invoke, 0));
+  __ Eor(out, out, infinity_high);
+  __ Eor(out, out, infinity_high2);
+  // We don't care about the sign bit, so shift left.
+  __ Orr(out, temp, Operand(out, vixl32::LSL, 1));
+  // If the result is 0, then it has 32 leading zeros, and less than that otherwise.
+  __ Clz(out, out);
+  // Any number less than 32 logically shifted right by 5 bits results in 0;
+  // the same operation on 32 yields 1.
+  __ Lsr(out, out, 5);
+}
+
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinDoubleDouble)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinFloatFloat)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxDoubleDouble)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxFloatFloat)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMinLongLong)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathMaxLongLong)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathCeil)          // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathFloor)         // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRint)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundDouble)   // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, MathRoundFloat)    // Could be done by changing rounding mode, maybe?
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeCASLong)     // High register pressure.
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, SystemArrayCopyChar)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, ReferenceGetReferent)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerHighestOneBit)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongHighestOneBit)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, IntegerLowestOneBit)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, LongLowestOneBit)
+
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOf);
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, StringStringIndexOfAfter);
+
+// 1.8.
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddInt)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndAddLong)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetInt)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetLong)
+UNIMPLEMENTED_INTRINSIC(ARMVIXL, UnsafeGetAndSetObject)
+
+UNREACHABLE_INTRINSICS(ARMVIXL)
+
+#undef __
+
+}  // namespace arm
+}  // namespace art
diff --git a/compiler/optimizing/intrinsics_arm_vixl.h b/compiler/optimizing/intrinsics_arm_vixl.h
new file mode 100644
index 0000000..6e79cb7
--- /dev/null
+++ b/compiler/optimizing/intrinsics_arm_vixl.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_VIXL_H_
+#define ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_VIXL_H_
+
+#include "intrinsics.h"
+#include "utils/arm/assembler_arm_vixl.h"
+
+namespace art {
+
+namespace arm {
+
+class ArmVIXLAssembler;
+class CodeGeneratorARMVIXL;
+
+class IntrinsicLocationsBuilderARMVIXL FINAL : public IntrinsicVisitor {
+ public:
+  explicit IntrinsicLocationsBuilderARMVIXL(CodeGeneratorARMVIXL* codegen);
+
+  // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
+  void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+  // Check whether an invoke is an intrinsic, and if so, create a location summary. Returns whether
+  // a corresponding LocationSummary with the intrinsified_ flag set was generated and attached to
+  // the invoke.
+  bool TryDispatch(HInvoke* invoke);
+
+ private:
+  ArenaAllocator* arena_;
+  ArmVIXLAssembler* assembler_;
+  const ArmInstructionSetFeatures& features_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicLocationsBuilderARMVIXL);
+};
+
+class IntrinsicCodeGeneratorARMVIXL FINAL : public IntrinsicVisitor {
+ public:
+  explicit IntrinsicCodeGeneratorARMVIXL(CodeGeneratorARMVIXL* codegen) : codegen_(codegen) {}
+
+  // Define visitor methods.
+
+#define OPTIMIZING_INTRINSICS(Name, IsStatic, NeedsEnvironmentOrCache, SideEffects, Exceptions, ...) \
+  void Visit ## Name(HInvoke* invoke) OVERRIDE;
+#include "intrinsics_list.h"
+INTRINSICS_LIST(OPTIMIZING_INTRINSICS)
+#undef INTRINSICS_LIST
+#undef OPTIMIZING_INTRINSICS
+
+ private:
+  ArenaAllocator* GetAllocator();
+  ArmVIXLAssembler* GetAssembler();
+
+  CodeGeneratorARMVIXL* codegen_;
+
+  DISALLOW_COPY_AND_ASSIGN(IntrinsicCodeGeneratorARMVIXL);
+};
+
+}  // namespace arm
+}  // namespace art
+
+#endif  // ART_COMPILER_OPTIMIZING_INTRINSICS_ARM_VIXL_H_
diff --git a/compiler/optimizing/intrinsics_mips.cc b/compiler/optimizing/intrinsics_mips.cc
index 5239f8f..7c81588 100644
--- a/compiler/optimizing/intrinsics_mips.cc
+++ b/compiler/optimizing/intrinsics_mips.cc
@@ -2495,6 +2495,9 @@
 UNIMPLEMENTED_INTRINSIC(MIPS, MathTan)
 UNIMPLEMENTED_INTRINSIC(MIPS, MathTanh)
 
+UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOf);
+UNIMPLEMENTED_INTRINSIC(MIPS, StringStringIndexOfAfter);
+
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndAddInt)
 UNIMPLEMENTED_INTRINSIC(MIPS, UnsafeGetAndAddLong)
diff --git a/compiler/optimizing/intrinsics_mips64.cc b/compiler/optimizing/intrinsics_mips64.cc
index 1d153e2..2d4f417 100644
--- a/compiler/optimizing/intrinsics_mips64.cc
+++ b/compiler/optimizing/intrinsics_mips64.cc
@@ -1947,6 +1947,9 @@
 UNIMPLEMENTED_INTRINSIC(MIPS64, MathTan)
 UNIMPLEMENTED_INTRINSIC(MIPS64, MathTanh)
 
+UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOf);
+UNIMPLEMENTED_INTRINSIC(MIPS64, StringStringIndexOfAfter);
+
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndAddInt)
 UNIMPLEMENTED_INTRINSIC(MIPS64, UnsafeGetAndAddLong)
diff --git a/compiler/optimizing/intrinsics_x86.cc b/compiler/optimizing/intrinsics_x86.cc
index f41e4d9..06ab46f 100644
--- a/compiler/optimizing/intrinsics_x86.cc
+++ b/compiler/optimizing/intrinsics_x86.cc
@@ -1408,21 +1408,22 @@
   // compression style is decided on alloc.
   __ cmpl(ecx, Address(arg, count_offset));
   __ j(kNotEqual, &return_false);
+  // Return true if strings are empty. Even with string compression `count == 0` means empty.
+  static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                "Expecting 0=compressed, 1=uncompressed");
+  __ jecxz(&return_true);
 
   if (mirror::kUseStringCompression) {
     NearLabel string_uncompressed;
-    // Differ cases into both compressed or both uncompressed. Different compression style
-    // is cut above.
-    __ cmpl(ecx, Immediate(0));
-    __ j(kGreaterEqual, &string_uncompressed);
+    // Extract length and differentiate between both compressed or both uncompressed.
+    // Different compression style is cut above.
+    __ shrl(ecx, Immediate(1));
+    __ j(kCarrySet, &string_uncompressed);
     // Divide string length by 2, rounding up, and continue as if uncompressed.
-    // Merge clearing the compression flag (+0x80000000) with +1 for rounding.
-    __ addl(ecx, Immediate(0x80000001));
+    __ addl(ecx, Immediate(1));
     __ shrl(ecx, Immediate(1));
     __ Bind(&string_uncompressed);
   }
-  // Return true if strings are empty.
-  __ jecxz(&return_true);
   // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
   __ leal(esi, Address(str, value_offset));
   __ leal(edi, Address(arg, value_offset));
@@ -1535,21 +1536,24 @@
   // Location of count within the String object.
   int32_t count_offset = mirror::String::CountOffset().Int32Value();
 
-  // Load string length, i.e., the count field of the string.
+  // Load the count field of the string containing the length and compression flag.
   __ movl(string_length, Address(string_obj, count_offset));
-  if (mirror::kUseStringCompression) {
-    string_length_flagged = locations->GetTemp(2).AsRegister<Register>();
-    __ movl(string_length_flagged, string_length);
-    // Mask out first bit used as compression flag.
-    __ andl(string_length, Immediate(INT32_MAX));
-  }
 
-  // Do a zero-length check.
+  // Do a zero-length check. Even with string compression `count == 0` means empty.
+  static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                "Expecting 0=compressed, 1=uncompressed");
   // TODO: Support jecxz.
   NearLabel not_found_label;
   __ testl(string_length, string_length);
   __ j(kEqual, &not_found_label);
 
+  if (mirror::kUseStringCompression) {
+    string_length_flagged = locations->GetTemp(2).AsRegister<Register>();
+    __ movl(string_length_flagged, string_length);
+    // Extract the length and shift out the least significant bit used as compression flag.
+    __ shrl(string_length, Immediate(1));
+  }
+
   if (start_at_zero) {
     // Number of chars to scan is the same as the string length.
     __ movl(counter, string_length);
@@ -1570,8 +1574,8 @@
 
     if (mirror::kUseStringCompression) {
       NearLabel modify_counter, offset_uncompressed_label;
-      __ cmpl(string_length_flagged, Immediate(0));
-      __ j(kGreaterEqual, &offset_uncompressed_label);
+      __ testl(string_length_flagged, Immediate(1));
+      __ j(kNotZero, &offset_uncompressed_label);
       // Move to the start of the string: string_obj + value_offset + start_index.
       __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
       __ jmp(&modify_counter);
@@ -1593,8 +1597,8 @@
   if (mirror::kUseStringCompression) {
     NearLabel uncompressed_string_comparison;
     NearLabel comparison_done;
-    __ cmpl(string_length_flagged, Immediate(0));
-    __ j(kGreater, &uncompressed_string_comparison);
+    __ testl(string_length_flagged, Immediate(1));
+    __ j(kNotZero, &uncompressed_string_comparison);
 
     // Check if EAX (search_value) is ASCII.
     __ cmpl(search_value, Immediate(127));
@@ -1787,8 +1791,10 @@
     __ cfi().AdjustCFAOffset(stack_adjust);
 
     NearLabel copy_loop, copy_uncompressed;
-    __ cmpl(Address(obj, count_offset), Immediate(0));
-    __ j(kGreaterEqual, &copy_uncompressed);
+    __ testl(Address(obj, count_offset), Immediate(1));
+    static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                  "Expecting 0=compressed, 1=uncompressed");
+    __ j(kNotZero, &copy_uncompressed);
     // Compute the address of the source string by adding the number of chars from
     // the source beginning to the value offset of a string.
     __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
@@ -2056,9 +2062,9 @@
       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           can_call ?
-                                                               LocationSummary::kCallOnSlowPath :
-                                                               LocationSummary::kNoCall,
+                                                           (can_call
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall),
                                                            kIntrinsified);
   if (can_call && kUseBakerReadBarrier) {
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
@@ -2076,7 +2082,7 @@
     }
   } else {
     locations->SetOut(Location::RequiresRegister(),
-                      can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+                      (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
   }
 }
 
@@ -2255,10 +2261,16 @@
   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_);
 }
 
-static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
+static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena,
+                                       Primitive::Type type,
                                        HInvoke* invoke) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      kUseBakerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
+                                                           (can_call
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall),
                                                            kIntrinsified);
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
@@ -2278,7 +2290,8 @@
   // Force a byte register for the output.
   locations->SetOut(Location::RegisterLocation(EAX));
   if (type == Primitive::kPrimNot) {
-    // Need temp registers for card-marking.
+    // Need temporary registers for card-marking, and possibly for
+    // (Baker) read barrier.
     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
     // Need a byte register for marking.
     locations->AddTemp(Location::RegisterLocation(ECX));
@@ -2294,14 +2307,9 @@
 }
 
 void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
-  // The UnsafeCASObject intrinsic is missing a read barrier, and
-  // therefore sometimes does not work as expected (b/25883050).
-  // Turn it off temporarily as a quick fix, until the read barrier is
-  // implemented (see TODO in GenCAS).
-  //
-  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
-  // this intrinsic.
-  if (kEmitCompilerReadBarrier) {
+  // The only read barrier implementation supporting the
+  // UnsafeCASObject intrinsic is the Baker-style read barriers.
+  if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
     return;
   }
 
@@ -2317,7 +2325,18 @@
   Location out = locations->Out();
   DCHECK_EQ(out.AsRegister<Register>(), EAX);
 
+  // The address of the field within the holding object.
+  Address field_addr(base, offset, ScaleFactor::TIMES_1, 0);
+
   if (type == Primitive::kPrimNot) {
+    // The only read barrier implementation supporting the
+    // UnsafeCASObject intrinsic is the Baker-style read barriers.
+    DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+    Location temp1_loc = locations->GetTemp(0);
+    Register temp1 = temp1_loc.AsRegister<Register>();
+    Register temp2 = locations->GetTemp(1).AsRegister<Register>();
+
     Register expected = locations->InAt(3).AsRegister<Register>();
     // Ensure `expected` is in EAX (required by the CMPXCHG instruction).
     DCHECK_EQ(expected, EAX);
@@ -2325,11 +2344,20 @@
 
     // Mark card for object assuming new value is stored.
     bool value_can_be_null = true;  // TODO: Worth finding out this information?
-    codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
-                        locations->GetTemp(1).AsRegister<Register>(),
-                        base,
-                        value,
-                        value_can_be_null);
+    codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null);
+
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      // Need to make sure the reference stored in the field is a to-space
+      // one before attempting the CAS or the CAS could fail incorrectly.
+      codegen->GenerateReferenceLoadWithBakerReadBarrier(
+          invoke,
+          temp1_loc,  // Unused, used only as a "temporary" within the read barrier.
+          base,
+          field_addr,
+          /* needs_null_check */ false,
+          /* always_update_field */ true,
+          &temp2);
+    }
 
     bool base_equals_value = (base == value);
     if (kPoisonHeapReferences) {
@@ -2337,7 +2365,7 @@
         // If `base` and `value` are the same register location, move
         // `value` to a temporary register.  This way, poisoning
         // `value` won't invalidate `base`.
-        value = locations->GetTemp(0).AsRegister<Register>();
+        value = temp1;
         __ movl(value, base);
       }
 
@@ -2356,19 +2384,12 @@
       __ PoisonHeapReference(value);
     }
 
-    // TODO: Add a read barrier for the reference stored in the object
-    // before attempting the CAS, similar to the one in the
-    // art::Unsafe_compareAndSwapObject JNI implementation.
-    //
-    // Note that this code is not (yet) used when read barriers are
-    // enabled (see IntrinsicLocationsBuilderX86::VisitUnsafeCASObject).
-    DCHECK(!kEmitCompilerReadBarrier);
-    __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), value);
+    __ LockCmpxchgl(field_addr, value);
 
     // LOCK CMPXCHG has full barrier semantics, and we don't need
     // scheduling barriers at this time.
 
-    // Convert ZF into the boolean result.
+    // Convert ZF into the Boolean result.
     __ setb(kZero, out.AsRegister<Register>());
     __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
 
@@ -2392,8 +2413,7 @@
       // Ensure the expected value is in EAX (required by the CMPXCHG
       // instruction).
       DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX);
-      __ LockCmpxchgl(Address(base, offset, TIMES_1, 0),
-                      locations->InAt(4).AsRegister<Register>());
+      __ LockCmpxchgl(field_addr, locations->InAt(4).AsRegister<Register>());
     } else if (type == Primitive::kPrimLong) {
       // Ensure the expected value is in EAX:EDX and that the new
       // value is in EBX:ECX (required by the CMPXCHG8B instruction).
@@ -2401,7 +2421,7 @@
       DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX);
       DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX);
       DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX);
-      __ LockCmpxchg8b(Address(base, offset, TIMES_1, 0));
+      __ LockCmpxchg8b(field_addr);
     } else {
       LOG(FATAL) << "Unexpected CAS type " << type;
     }
@@ -2409,7 +2429,7 @@
     // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
     // don't need scheduling barriers at this time.
 
-    // Convert ZF into the boolean result.
+    // Convert ZF into the Boolean result.
     __ setb(kZero, out.AsRegister<Register>());
     __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
   }
@@ -2424,14 +2444,9 @@
 }
 
 void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
-  // The UnsafeCASObject intrinsic is missing a read barrier, and
-  // therefore sometimes does not work as expected (b/25883050).
-  // Turn it off temporarily as a quick fix, until the read barrier is
-  // implemented (see TODO in GenCAS).
-  //
-  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
-  // this intrinsic.
-  DCHECK(!kEmitCompilerReadBarrier);
+  // The only read barrier implementation supporting the
+  // UnsafeCASObject intrinsic is the Baker-style read barriers.
+  DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
 
   GenCAS(Primitive::kPrimNot, invoke, codegen_);
 }
@@ -3191,7 +3206,7 @@
     //   if (src_ptr != end_ptr) {
     //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
     //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
-    //     bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+    //     bool is_gray = (rb_state == ReadBarrier::GrayState());
     //     if (is_gray) {
     //       // Slow-path copy.
     //       for (size_t i = 0; i != length; ++i) {
@@ -3213,14 +3228,13 @@
     __ j(kEqual, &done);
 
     // Given the numeric representation, it's enough to check the low bit of the rb_state.
-    static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
-    static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
-    static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+    static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+    static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
     constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
     constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
     constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
 
-    // if (rb_state == ReadBarrier::gray_ptr_)
+    // if (rb_state == ReadBarrier::GrayState())
     //   goto slow_path;
     // At this point, just do the "if" and make sure that flags are preserved until the branch.
     __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
@@ -3315,6 +3329,9 @@
 UNIMPLEMENTED_INTRINSIC(X86, IntegerLowestOneBit)
 UNIMPLEMENTED_INTRINSIC(X86, LongLowestOneBit)
 
+UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf);
+UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter);
+
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt)
 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong)
diff --git a/compiler/optimizing/intrinsics_x86_64.cc b/compiler/optimizing/intrinsics_x86_64.cc
index 4b0afca..2ea8670 100644
--- a/compiler/optimizing/intrinsics_x86_64.cc
+++ b/compiler/optimizing/intrinsics_x86_64.cc
@@ -1399,7 +1399,7 @@
     //   if (src_ptr != end_ptr) {
     //     uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
     //     lfence;  // Load fence or artificial data dependency to prevent load-load reordering
-    //     bool is_gray = (rb_state == ReadBarrier::gray_ptr_);
+    //     bool is_gray = (rb_state == ReadBarrier::GrayState());
     //     if (is_gray) {
     //       // Slow-path copy.
     //       do {
@@ -1420,14 +1420,13 @@
     __ j(kEqual, &done);
 
     // Given the numeric representation, it's enough to check the low bit of the rb_state.
-    static_assert(ReadBarrier::white_ptr_ == 0, "Expecting white to have value 0");
-    static_assert(ReadBarrier::gray_ptr_ == 1, "Expecting gray to have value 1");
-    static_assert(ReadBarrier::black_ptr_ == 2, "Expecting black to have value 2");
+    static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
+    static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
     constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
     constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
     constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
 
-    // if (rb_state == ReadBarrier::gray_ptr_)
+    // if (rb_state == ReadBarrier::GrayState())
     //   goto slow_path;
     // At this point, just do the "if" and make sure that flags are preserved until the branch.
     __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
@@ -1575,20 +1574,23 @@
   // compression style is decided on alloc.
   __ cmpl(rcx, Address(arg, count_offset));
   __ j(kNotEqual, &return_false);
+  // Return true if both strings are empty. Even with string compression `count == 0` means empty.
+  static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                "Expecting 0=compressed, 1=uncompressed");
+  __ jrcxz(&return_true);
 
   if (mirror::kUseStringCompression) {
     NearLabel string_uncompressed;
-    // Both string are compressed.
-    __ cmpl(rcx, Immediate(0));
-    __ j(kGreaterEqual, &string_uncompressed);
+    // Extract length and differentiate between both compressed or both uncompressed.
+    // Different compression style is cut above.
+    __ shrl(rcx, Immediate(1));
+    __ j(kCarrySet, &string_uncompressed);
     // Divide string length by 2, rounding up, and continue as if uncompressed.
     // Merge clearing the compression flag with +1 for rounding.
-    __ addl(rcx, Immediate(static_cast<int32_t>(0x80000001)));
+    __ addl(rcx, Immediate(1));
     __ shrl(rcx, Immediate(1));
     __ Bind(&string_uncompressed);
   }
-  // Return true if both strings are empty.
-  __ jrcxz(&return_true);
   // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction.
   __ leal(rsi, Address(str, value_offset));
   __ leal(rdi, Address(arg, value_offset));
@@ -1695,21 +1697,22 @@
   // Location of count within the String object.
   int32_t count_offset = mirror::String::CountOffset().Int32Value();
 
-  // Load string length, i.e., the count field of the string.
+  // Load the count field of the string containing the length and compression flag.
   __ movl(string_length, Address(string_obj, count_offset));
-  if (mirror::kUseStringCompression) {
-    // Use TMP to keep string_length_flagged.
-    __ movl(CpuRegister(TMP), string_length);
-    // Mask out first bit used as compression flag.
-    __ andl(string_length, Immediate(INT32_MAX));
-  }
 
-  // Do a length check.
+  // Do a zero-length check. Even with string compression `count == 0` means empty.
   // TODO: Support jecxz.
   NearLabel not_found_label;
   __ testl(string_length, string_length);
   __ j(kEqual, &not_found_label);
 
+  if (mirror::kUseStringCompression) {
+    // Use TMP to keep string_length_flagged.
+    __ movl(CpuRegister(TMP), string_length);
+    // Mask out first bit used as compression flag.
+    __ shrl(string_length, Immediate(1));
+  }
+
   if (start_at_zero) {
     // Number of chars to scan is the same as the string length.
     __ movl(counter, string_length);
@@ -1729,8 +1732,8 @@
 
     if (mirror::kUseStringCompression) {
       NearLabel modify_counter, offset_uncompressed_label;
-      __ cmpl(CpuRegister(TMP), Immediate(0));
-      __ j(kGreaterEqual, &offset_uncompressed_label);
+      __ testl(CpuRegister(TMP), Immediate(1));
+      __ j(kNotZero, &offset_uncompressed_label);
       __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
       __ jmp(&modify_counter);
       // Move to the start of the string: string_obj + value_offset + 2 * start_index.
@@ -1748,8 +1751,8 @@
   if (mirror::kUseStringCompression) {
     NearLabel uncompressed_string_comparison;
     NearLabel comparison_done;
-    __ cmpl(CpuRegister(TMP), Immediate(0));
-    __ j(kGreater, &uncompressed_string_comparison);
+    __ testl(CpuRegister(TMP), Immediate(1));
+    __ j(kNotZero, &uncompressed_string_comparison);
     // Check if RAX (search_value) is ASCII.
     __ cmpl(search_value, Immediate(127));
     __ j(kGreater, &not_found_label);
@@ -1932,8 +1935,10 @@
     // Location of count in string.
     const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
 
-    __ cmpl(Address(obj, count_offset), Immediate(0));
-    __ j(kGreaterEqual, &copy_uncompressed);
+    __ testl(Address(obj, count_offset), Immediate(1));
+    static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
+                  "Expecting 0=compressed, 1=uncompressed");
+    __ j(kNotZero, &copy_uncompressed);
     // Compute the address of the source string by adding the number of chars from
     // the source beginning to the value offset of a string.
     __ leaq(CpuRegister(RSI),
@@ -2172,9 +2177,9 @@
       (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
        invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           can_call ?
-                                                               LocationSummary::kCallOnSlowPath :
-                                                               LocationSummary::kNoCall,
+                                                           (can_call
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall),
                                                            kIntrinsified);
   if (can_call && kUseBakerReadBarrier) {
     locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty());  // No caller-save registers.
@@ -2183,7 +2188,7 @@
   locations->SetInAt(1, Location::RequiresRegister());
   locations->SetInAt(2, Location::RequiresRegister());
   locations->SetOut(Location::RequiresRegister(),
-                    can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap);
+                    (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
@@ -2333,10 +2338,16 @@
   GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_);
 }
 
-static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena, Primitive::Type type,
+static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena,
+                                       Primitive::Type type,
                                        HInvoke* invoke) {
+  bool can_call = kEmitCompilerReadBarrier &&
+      kUseBakerReadBarrier &&
+      (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
   LocationSummary* locations = new (arena) LocationSummary(invoke,
-                                                           LocationSummary::kNoCall,
+                                                           (can_call
+                                                                ? LocationSummary::kCallOnSlowPath
+                                                                : LocationSummary::kNoCall),
                                                            kIntrinsified);
   locations->SetInAt(0, Location::NoLocation());        // Unused receiver.
   locations->SetInAt(1, Location::RequiresRegister());
@@ -2347,7 +2358,8 @@
 
   locations->SetOut(Location::RequiresRegister());
   if (type == Primitive::kPrimNot) {
-    // Need temp registers for card-marking.
+    // Need temporary registers for card-marking, and possibly for
+    // (Baker) read barrier.
     locations->AddTemp(Location::RequiresRegister());  // Possibly used for reference poisoning too.
     locations->AddTemp(Location::RequiresRegister());
   }
@@ -2362,14 +2374,9 @@
 }
 
 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
-  // The UnsafeCASObject intrinsic is missing a read barrier, and
-  // therefore sometimes does not work as expected (b/25883050).
-  // Turn it off temporarily as a quick fix, until the read barrier is
-  // implemented (see TODO in GenCAS).
-  //
-  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
-  // this intrinsic.
-  if (kEmitCompilerReadBarrier) {
+  // The only read barrier implementation supporting the
+  // UnsafeCASObject intrinsic is the Baker-style read barriers.
+  if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
     return;
   }
 
@@ -2386,16 +2393,37 @@
   // Ensure `expected` is in RAX (required by the CMPXCHG instruction).
   DCHECK_EQ(expected.AsRegister(), RAX);
   CpuRegister value = locations->InAt(4).AsRegister<CpuRegister>();
-  CpuRegister out = locations->Out().AsRegister<CpuRegister>();
+  Location out_loc = locations->Out();
+  CpuRegister out = out_loc.AsRegister<CpuRegister>();
 
   if (type == Primitive::kPrimNot) {
+    // The only read barrier implementation supporting the
+    // UnsafeCASObject intrinsic is the Baker-style read barriers.
+    DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
+
+    CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
+    CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
+
     // Mark card for object assuming new value is stored.
     bool value_can_be_null = true;  // TODO: Worth finding out this information?
-    codegen->MarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
-                        locations->GetTemp(1).AsRegister<CpuRegister>(),
-                        base,
-                        value,
-                        value_can_be_null);
+    codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null);
+
+    // The address of the field within the holding object.
+    Address field_addr(base, offset, ScaleFactor::TIMES_1, 0);
+
+    if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
+      // Need to make sure the reference stored in the field is a to-space
+      // one before attempting the CAS or the CAS could fail incorrectly.
+      codegen->GenerateReferenceLoadWithBakerReadBarrier(
+          invoke,
+          out_loc,  // Unused, used only as a "temporary" within the read barrier.
+          base,
+          field_addr,
+          /* needs_null_check */ false,
+          /* always_update_field */ true,
+          &temp1,
+          &temp2);
+    }
 
     bool base_equals_value = (base.AsRegister() == value.AsRegister());
     Register value_reg = value.AsRegister();
@@ -2404,7 +2432,7 @@
         // If `base` and `value` are the same register location, move
         // `value_reg` to a temporary register.  This way, poisoning
         // `value_reg` won't invalidate `base`.
-        value_reg = locations->GetTemp(0).AsRegister<CpuRegister>().AsRegister();
+        value_reg = temp1.AsRegister();
         __ movl(CpuRegister(value_reg), base);
       }
 
@@ -2423,19 +2451,12 @@
       __ PoisonHeapReference(CpuRegister(value_reg));
     }
 
-    // TODO: Add a read barrier for the reference stored in the object
-    // before attempting the CAS, similar to the one in the
-    // art::Unsafe_compareAndSwapObject JNI implementation.
-    //
-    // Note that this code is not (yet) used when read barriers are
-    // enabled (see IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject).
-    DCHECK(!kEmitCompilerReadBarrier);
-    __ LockCmpxchgl(Address(base, offset, TIMES_1, 0), CpuRegister(value_reg));
+    __ LockCmpxchgl(field_addr, CpuRegister(value_reg));
 
     // LOCK CMPXCHG has full barrier semantics, and we don't need
     // scheduling barriers at this time.
 
-    // Convert ZF into the boolean result.
+    // Convert ZF into the Boolean result.
     __ setcc(kZero, out);
     __ movzxb(out, out);
 
@@ -2468,7 +2489,7 @@
     // LOCK CMPXCHG has full barrier semantics, and we don't need
     // scheduling barriers at this time.
 
-    // Convert ZF into the boolean result.
+    // Convert ZF into the Boolean result.
     __ setcc(kZero, out);
     __ movzxb(out, out);
   }
@@ -2483,14 +2504,9 @@
 }
 
 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
-  // The UnsafeCASObject intrinsic is missing a read barrier, and
-  // therefore sometimes does not work as expected (b/25883050).
-  // Turn it off temporarily as a quick fix, until the read barrier is
-  // implemented (see TODO in GenCAS).
-  //
-  // TODO(rpl): Implement read barrier support in GenCAS and re-enable
-  // this intrinsic.
-  DCHECK(!kEmitCompilerReadBarrier);
+  // The only read barrier implementation supporting the
+  // UnsafeCASObject intrinsic is the Baker-style read barriers.
+  DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
 
   GenCAS(Primitive::kPrimNot, invoke, codegen_);
 }
@@ -2982,6 +2998,9 @@
 UNIMPLEMENTED_INTRINSIC(X86_64, FloatIsInfinite)
 UNIMPLEMENTED_INTRINSIC(X86_64, DoubleIsInfinite)
 
+UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOf);
+UNIMPLEMENTED_INTRINSIC(X86_64, StringStringIndexOfAfter);
+
 // 1.8.
 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddInt)
 UNIMPLEMENTED_INTRINSIC(X86_64, UnsafeGetAndAddLong)
diff --git a/compiler/optimizing/licm.cc b/compiler/optimizing/licm.cc
index a0ded74..f0086fb 100644
--- a/compiler/optimizing/licm.cc
+++ b/compiler/optimizing/licm.cc
@@ -15,6 +15,7 @@
  */
 
 #include "licm.h"
+
 #include "side_effects_analysis.h"
 
 namespace art {
@@ -90,8 +91,7 @@
   }
 
   // Post order visit to visit inner loops before outer loops.
-  for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
+  for (HBasicBlock* block : graph_->GetPostOrder()) {
     if (!block->IsLoopHeader()) {
       // Only visit the loop when we reach the header.
       continue;
@@ -120,17 +120,17 @@
       }
       DCHECK(!loop_info->IsIrreducible());
 
-      // We can move an instruction that can throw only if it is the first
-      // throwing instruction in the loop. Note that the first potentially
-      // throwing instruction encountered that is not hoisted stops this
-      // optimization. Non-throwing instruction can still be hoisted.
-      bool found_first_non_hoisted_throwing_instruction_in_loop = !inner->IsLoopHeader();
+      // We can move an instruction that can throw only as long as it is the first visible
+      // instruction (throw or write) in the loop. Note that the first potentially visible
+      // instruction that is not hoisted stops this optimization. Non-throwing instructions,
+      // on the other hand, can still be hoisted.
+      bool found_first_non_hoisted_visible_instruction_in_loop = !inner->IsLoopHeader();
       for (HInstructionIterator inst_it(inner->GetInstructions());
            !inst_it.Done();
            inst_it.Advance()) {
         HInstruction* instruction = inst_it.Current();
         if (instruction->CanBeMoved()
-            && (!instruction->CanThrow() || !found_first_non_hoisted_throwing_instruction_in_loop)
+            && (!instruction->CanThrow() || !found_first_non_hoisted_visible_instruction_in_loop)
             && !instruction->GetSideEffects().MayDependOn(loop_effects)
             && InputsAreDefinedBeforeLoop(instruction)) {
           // We need to update the environment if the instruction has a loop header
@@ -142,10 +142,10 @@
           }
           instruction->MoveBefore(pre_header->GetLastInstruction());
           MaybeRecordStat(MethodCompilationStat::kLoopInvariantMoved);
-        } else if (instruction->CanThrow()) {
-          // If `instruction` can throw, we cannot move further instructions
-          // that can throw as well.
-          found_first_non_hoisted_throwing_instruction_in_loop = true;
+        } else if (instruction->CanThrow() || instruction->DoesAnyWrite()) {
+          // If `instruction` can do something visible (throw or write),
+          // we cannot move further instructions that can throw.
+          found_first_non_hoisted_visible_instruction_in_loop = true;
         }
       }
     }
diff --git a/compiler/optimizing/linear_order.cc b/compiler/optimizing/linear_order.cc
index 3af212f..80cecd4 100644
--- a/compiler/optimizing/linear_order.cc
+++ b/compiler/optimizing/linear_order.cc
@@ -94,8 +94,7 @@
   //      for it.
   ArenaVector<uint32_t> forward_predecessors(graph->GetBlocks().size(),
                                              allocator->Adapter(kArenaAllocLinearOrder));
-  for (HReversePostOrderIterator it(*graph); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
+  for (HBasicBlock* block : graph->GetReversePostOrder()) {
     size_t number_of_forward_predecessors = block->GetPredecessors().size();
     if (block->IsLoopHeader()) {
       number_of_forward_predecessors -= block->GetLoopInformation()->NumberOfBackEdges();
diff --git a/compiler/optimizing/linear_order.h b/compiler/optimizing/linear_order.h
index cdbdd07..7122d67 100644
--- a/compiler/optimizing/linear_order.h
+++ b/compiler/optimizing/linear_order.h
@@ -30,16 +30,12 @@
 //
 // for (HBasicBlock* block : linear_order)                   // linear order
 //
-// for (HBasicBlock* block : LinearPostOrder(linear_order))  // linear post order
+// for (HBasicBlock* block : ReverseRange(linear_order))     // linear post order
 //
 void LinearizeGraph(const HGraph* graph,
                     ArenaAllocator* allocator,
                     ArenaVector<HBasicBlock*>* linear_order);
 
-inline auto LinearPostOrder(const ArenaVector<HBasicBlock*>& linear_order) {
-  return MakeIterationRange(linear_order.rbegin(), linear_order.rend());
-}
-
 }  // namespace art
 
 #endif  // ART_COMPILER_OPTIMIZING_LINEAR_ORDER_H_
diff --git a/compiler/optimizing/liveness_test.cc b/compiler/optimizing/liveness_test.cc
index bd74368..37b58de 100644
--- a/compiler/optimizing/liveness_test.cc
+++ b/compiler/optimizing/liveness_test.cc
@@ -56,8 +56,7 @@
   liveness.Analyze();
 
   std::ostringstream buffer;
-  for (HInsertionOrderIterator it(*graph); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
+  for (HBasicBlock* block : graph->GetBlocks()) {
     buffer << "Block " << block->GetBlockId() << std::endl;
     size_t ssa_values = liveness.GetNumberOfSsaValues();
     BitVector* live_in = liveness.GetLiveInSet(*block);
diff --git a/compiler/optimizing/load_store_elimination.cc b/compiler/optimizing/load_store_elimination.cc
index 7347686..15e6059 100644
--- a/compiler/optimizing/load_store_elimination.cc
+++ b/compiler/optimizing/load_store_elimination.cc
@@ -33,11 +33,11 @@
  public:
   ReferenceInfo(HInstruction* reference, size_t pos) : reference_(reference), position_(pos) {
     is_singleton_ = true;
-    is_singleton_and_not_returned_ = true;
+    is_singleton_and_non_escaping_ = true;
     if (!reference_->IsNewInstance() && !reference_->IsNewArray()) {
       // For references not allocated in the method, don't assume anything.
       is_singleton_ = false;
-      is_singleton_and_not_returned_ = false;
+      is_singleton_and_non_escaping_ = false;
       return;
     }
 
@@ -50,7 +50,7 @@
         // BoundType shouldn't normally be necessary for a NewInstance.
         // Just be conservative for the uncommon cases.
         is_singleton_ = false;
-        is_singleton_and_not_returned_ = false;
+        is_singleton_and_non_escaping_ = false;
         return;
       }
       if (user->IsPhi() || user->IsSelect() || user->IsInvoke() ||
@@ -62,21 +62,37 @@
         // reference_ is merged to HPhi/HSelect, passed to a callee, or stored to heap.
         // reference_ isn't the only name that can refer to its value anymore.
         is_singleton_ = false;
-        is_singleton_and_not_returned_ = false;
+        is_singleton_and_non_escaping_ = false;
         return;
       }
       if ((user->IsUnresolvedInstanceFieldGet() && (reference_ == user->InputAt(0))) ||
           (user->IsUnresolvedInstanceFieldSet() && (reference_ == user->InputAt(0)))) {
-        // The field is accessed in an unresolved way. We mark the object as a singleton to
-        // disable load/store optimizations on it.
+        // The field is accessed in an unresolved way. We mark the object as a non-singleton
+        // to disable load/store optimizations on it.
         // Note that we could optimize this case and still perform some optimizations until
         // we hit the unresolved access, but disabling is the simplest.
         is_singleton_ = false;
-        is_singleton_and_not_returned_ = false;
+        is_singleton_and_non_escaping_ = false;
         return;
       }
       if (user->IsReturn()) {
-        is_singleton_and_not_returned_ = false;
+        is_singleton_and_non_escaping_ = false;
+      }
+    }
+
+    if (!is_singleton_ || !is_singleton_and_non_escaping_) {
+      return;
+    }
+
+    // Look at Environment uses and if it's for HDeoptimize, it's treated the same
+    // as a return which escapes at the end of executing the compiled code. We don't
+    // do store elimination for singletons that escape through HDeoptimize.
+    // Other Environment uses are fine since LSE is disabled for debuggable.
+    for (const HUseListNode<HEnvironment*>& use : reference_->GetEnvUses()) {
+      HEnvironment* user = use.GetUser();
+      if (user->GetHolder()->IsDeoptimize()) {
+        is_singleton_and_non_escaping_ = false;
+        break;
       }
     }
   }
@@ -96,17 +112,22 @@
     return is_singleton_;
   }
 
-  // Returns true if reference_ is a singleton and not returned to the caller.
+  // Returns true if reference_ is a singleton and not returned to the caller or
+  // used as an environment local of an HDeoptimize instruction.
   // The allocation and stores into reference_ may be eliminated for such cases.
-  bool IsSingletonAndNotReturned() const {
-    return is_singleton_and_not_returned_;
+  bool IsSingletonAndNonEscaping() const {
+    return is_singleton_and_non_escaping_;
   }
 
  private:
   HInstruction* const reference_;
   const size_t position_;     // position in HeapLocationCollector's ref_info_array_.
   bool is_singleton_;         // can only be referred to by a single name in the method.
-  bool is_singleton_and_not_returned_;  // reference_ is singleton and not returned to caller.
+
+  // reference_ is singleton and does not escape in the end either by
+  // returning to the caller, or being used as an environment local of an
+  // HDeoptimize instruction.
+  bool is_singleton_and_non_escaping_;
 
   DISALLOW_COPY_AND_ASSIGN(ReferenceInfo);
 };
@@ -168,7 +189,9 @@
   const int16_t declaring_class_def_index_;  // declaring class's def's dex index.
   bool value_killed_by_loop_side_effects_;   // value of this location may be killed by loop
                                              // side effects because this location is stored
-                                             // into inside a loop.
+                                             // into inside a loop. This gives
+                                             // better info on whether a singleton's location
+                                             // value may be killed by loop side effects.
 
   DISALLOW_COPY_AND_ASSIGN(HeapLocation);
 };
@@ -200,8 +223,7 @@
                          kArenaAllocLSE),
         has_heap_stores_(false),
         has_volatile_(false),
-        has_monitor_operations_(false),
-        may_deoptimize_(false) {}
+        has_monitor_operations_(false) {}
 
   size_t GetNumberOfHeapLocations() const {
     return heap_locations_.size();
@@ -234,13 +256,6 @@
     return has_monitor_operations_;
   }
 
-  // Returns whether this method may be deoptimized.
-  // Currently we don't have meta data support for deoptimizing
-  // a method that eliminates allocations/stores.
-  bool MayDeoptimize() const {
-    return may_deoptimize_;
-  }
-
   // Find and return the heap location index in heap_locations_.
   size_t FindHeapLocationIndex(ReferenceInfo* ref_info,
                                size_t offset,
@@ -420,8 +435,26 @@
   void VisitInstanceFieldSet(HInstanceFieldSet* instruction) OVERRIDE {
     HeapLocation* location = VisitFieldAccess(instruction->InputAt(0), instruction->GetFieldInfo());
     has_heap_stores_ = true;
-    if (instruction->GetBlock()->GetLoopInformation() != nullptr) {
-      location->SetValueKilledByLoopSideEffects(true);
+    if (location->GetReferenceInfo()->IsSingleton()) {
+      // A singleton's location value may be killed by loop side effects if it's
+      // defined before that loop, and it's stored into inside that loop.
+      HLoopInformation* loop_info = instruction->GetBlock()->GetLoopInformation();
+      if (loop_info != nullptr) {
+        HInstruction* ref = location->GetReferenceInfo()->GetReference();
+        DCHECK(ref->IsNewInstance());
+        if (loop_info->IsDefinedOutOfTheLoop(ref)) {
+          // ref's location value may be killed by this loop's side effects.
+          location->SetValueKilledByLoopSideEffects(true);
+        } else {
+          // ref is defined inside this loop so this loop's side effects cannot
+          // kill its location value at the loop header since ref/its location doesn't
+          // exist yet at the loop header.
+        }
+      }
+    } else {
+      // For non-singletons, value_killed_by_loop_side_effects_ is inited to
+      // true.
+      DCHECK_EQ(location->IsValueKilledByLoopSideEffects(), true);
     }
   }
 
@@ -473,10 +506,6 @@
     CreateReferenceInfoForReferenceType(instruction);
   }
 
-  void VisitDeoptimize(HDeoptimize* instruction ATTRIBUTE_UNUSED) OVERRIDE {
-    may_deoptimize_ = true;
-  }
-
   void VisitMonitorOperation(HMonitorOperation* monitor ATTRIBUTE_UNUSED) OVERRIDE {
     has_monitor_operations_ = true;
   }
@@ -488,7 +517,6 @@
                             // alias analysis and won't be as effective.
   bool has_volatile_;       // If there are volatile field accesses.
   bool has_monitor_operations_;    // If there are monitor operations.
-  bool may_deoptimize_;     // Only true for HDeoptimize with single-frame deoptimization.
 
   DISALLOW_COPY_AND_ASSIGN(HeapLocationCollector);
 };
@@ -643,27 +671,59 @@
     if (predecessors.size() == 0) {
       return;
     }
+
     ArenaVector<HInstruction*>& heap_values = heap_values_for_[block->GetBlockId()];
     for (size_t i = 0; i < heap_values.size(); i++) {
-      HInstruction* pred0_value = heap_values_for_[predecessors[0]->GetBlockId()][i];
-      heap_values[i] = pred0_value;
-      if (pred0_value != kUnknownHeapValue) {
-        for (size_t j = 1; j < predecessors.size(); j++) {
-          HInstruction* pred_value = heap_values_for_[predecessors[j]->GetBlockId()][i];
-          if (pred_value != pred0_value) {
-            heap_values[i] = kUnknownHeapValue;
-            break;
-          }
+      HInstruction* merged_value = nullptr;
+      // Whether merged_value is a result that's merged from all predecessors.
+      bool from_all_predecessors = true;
+      ReferenceInfo* ref_info = heap_location_collector_.GetHeapLocation(i)->GetReferenceInfo();
+      HInstruction* singleton_ref = nullptr;
+      if (ref_info->IsSingletonAndNonEscaping()) {
+        // We do more analysis of liveness when merging heap values for such
+        // cases since stores into such references may potentially be eliminated.
+        singleton_ref = ref_info->GetReference();
+      }
+
+      for (HBasicBlock* predecessor : predecessors) {
+        HInstruction* pred_value = heap_values_for_[predecessor->GetBlockId()][i];
+        if ((singleton_ref != nullptr) &&
+            !singleton_ref->GetBlock()->Dominates(predecessor)) {
+          // singleton_ref is not live in this predecessor. Skip this predecessor since
+          // it does not really have the location.
+          DCHECK_EQ(pred_value, kUnknownHeapValue);
+          from_all_predecessors = false;
+          continue;
+        }
+        if (merged_value == nullptr) {
+          // First seen heap value.
+          merged_value = pred_value;
+        } else if (pred_value != merged_value) {
+          // There are conflicting values.
+          merged_value = kUnknownHeapValue;
+          break;
         }
       }
 
-      if (heap_values[i] == kUnknownHeapValue) {
+      if (merged_value == kUnknownHeapValue) {
+        // There are conflicting heap values from different predecessors.
         // Keep the last store in each predecessor since future loads cannot be eliminated.
-        for (size_t j = 0; j < predecessors.size(); j++) {
-          ArenaVector<HInstruction*>& pred_values = heap_values_for_[predecessors[j]->GetBlockId()];
+        for (HBasicBlock* predecessor : predecessors) {
+          ArenaVector<HInstruction*>& pred_values = heap_values_for_[predecessor->GetBlockId()];
           KeepIfIsStore(pred_values[i]);
         }
       }
+
+      if ((merged_value == nullptr) || !from_all_predecessors) {
+        DCHECK(singleton_ref != nullptr);
+        DCHECK((singleton_ref->GetBlock() == block) ||
+               !singleton_ref->GetBlock()->Dominates(block));
+        // singleton_ref is not defined before block or defined only in some of its
+        // predecessors, so block doesn't really have the location at its entry.
+        heap_values[i] = kUnknownHeapValue;
+      } else {
+        heap_values[i] = merged_value;
+      }
     }
   }
 
@@ -792,8 +852,7 @@
     } else if (index != nullptr) {
       // For array element, don't eliminate stores since it can be easily aliased
       // with non-constant index.
-    } else if (!heap_location_collector_.MayDeoptimize() &&
-               ref_info->IsSingletonAndNotReturned()) {
+    } else if (ref_info->IsSingletonAndNonEscaping()) {
       // Store into a field of a singleton that's not returned. The value cannot be
       // killed due to aliasing/invocation. It can be redundant since future loads can
       // directly get the value set by this instruction. The value can still be killed due to
@@ -810,9 +869,6 @@
         if (loop_info != nullptr) {
           // instruction is a store in the loop so the loop must does write.
           DCHECK(side_effects_.GetLoopEffects(loop_info->GetHeader()).DoesAnyWrite());
-          // If it's a singleton, IsValueKilledByLoopSideEffects() must be true.
-          DCHECK(!ref_info->IsSingleton() ||
-                 heap_location_collector_.GetHeapLocation(idx)->IsValueKilledByLoopSideEffects());
 
           if (loop_info->IsDefinedOutOfTheLoop(original_ref)) {
             DCHECK(original_ref->GetBlock()->Dominates(loop_info->GetPreHeader()));
@@ -970,8 +1026,7 @@
       // new_instance isn't used for field accesses. No need to process it.
       return;
     }
-    if (!heap_location_collector_.MayDeoptimize() &&
-        ref_info->IsSingletonAndNotReturned() &&
+    if (ref_info->IsSingletonAndNonEscaping() &&
         !new_instance->IsFinalizable() &&
         !new_instance->NeedsAccessCheck()) {
       singleton_new_instances_.push_back(new_instance);
@@ -1029,8 +1084,8 @@
     return;
   }
   HeapLocationCollector heap_location_collector(graph_);
-  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    heap_location_collector.VisitBasicBlock(it.Current());
+  for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+    heap_location_collector.VisitBasicBlock(block);
   }
   if (heap_location_collector.GetNumberOfHeapLocations() > kMaxNumberOfHeapLocations) {
     // Bail out if there are too many heap locations to deal with.
@@ -1048,8 +1103,8 @@
   }
   heap_location_collector.BuildAliasingMatrix();
   LSEVisitor lse_visitor(graph_, heap_location_collector, side_effects_);
-  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    lse_visitor.VisitBasicBlock(it.Current());
+  for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+    lse_visitor.VisitBasicBlock(block);
   }
   lse_visitor.RemoveInstructions();
 }
diff --git a/compiler/optimizing/locations.cc b/compiler/optimizing/locations.cc
index d157509..a9fe209 100644
--- a/compiler/optimizing/locations.cc
+++ b/compiler/optimizing/locations.cc
@@ -16,11 +16,16 @@
 
 #include "locations.h"
 
+#include <type_traits>
+
 #include "nodes.h"
 #include "code_generator.h"
 
 namespace art {
 
+// Verify that Location is trivially copyable.
+static_assert(std::is_trivially_copyable<Location>::value, "Location should be trivially copyable");
+
 LocationSummary::LocationSummary(HInstruction* instruction,
                                  CallKind call_kind,
                                  bool intrinsified)
diff --git a/compiler/optimizing/locations.h b/compiler/optimizing/locations.h
index da27928..091b58a 100644
--- a/compiler/optimizing/locations.h
+++ b/compiler/optimizing/locations.h
@@ -91,12 +91,9 @@
     DCHECK(!IsValid());
   }
 
-  Location(const Location& other) : value_(other.value_) {}
+  Location(const Location& other) = default;
 
-  Location& operator=(const Location& other) {
-    value_ = other.value_;
-    return *this;
-  }
+  Location& operator=(const Location& other) = default;
 
   bool IsConstant() const {
     return (value_ & kLocationConstantMask) == kConstant;
@@ -328,7 +325,6 @@
         LOG(FATAL) << "Should not use this location kind";
     }
     UNREACHABLE();
-    return "?";
   }
 
   // Unallocated locations.
@@ -529,6 +525,12 @@
     temps_.push_back(location);
   }
 
+  void AddRegisterTemps(size_t count) {
+    for (size_t i = 0; i < count; ++i) {
+      AddTemp(Location::RequiresRegister());
+    }
+  }
+
   Location GetTemp(uint32_t at) const {
     return temps_[at];
   }
diff --git a/compiler/optimizing/loop_optimization.cc b/compiler/optimizing/loop_optimization.cc
index 4acf3ac..f4616e3 100644
--- a/compiler/optimizing/loop_optimization.cc
+++ b/compiler/optimizing/loop_optimization.cc
@@ -20,14 +20,31 @@
 
 namespace art {
 
-// TODO: Generalize to cycles, as found by induction analysis?
-static bool IsPhiAddSub(HPhi* phi, /*out*/ HInstruction** addsub_out) {
-  HInputsRef inputs = phi->GetInputs();
-  if (inputs.size() == 2 && (inputs[1]->IsAdd() || inputs[1]->IsSub())) {
-    HInstruction* addsub = inputs[1];
-    if (addsub->InputAt(0) == phi || addsub->InputAt(1) == phi) {
-      if (addsub->GetUses().HasExactlyOneElement()) {
-        *addsub_out = addsub;
+// Remove the instruction from the graph. A bit more elaborate than the usual
+// instruction removal, since there may be a cycle in the use structure.
+static void RemoveFromCycle(HInstruction* instruction) {
+  instruction->RemoveAsUserOfAllInputs();
+  instruction->RemoveEnvironmentUsers();
+  instruction->GetBlock()->RemoveInstructionOrPhi(instruction, /*ensure_safety=*/ false);
+}
+
+// Detect a goto block and sets succ to the single successor.
+static bool IsGotoBlock(HBasicBlock* block, /*out*/ HBasicBlock** succ) {
+  if (block->GetPredecessors().size() == 1 &&
+      block->GetSuccessors().size() == 1 &&
+      block->IsSingleGoto()) {
+    *succ = block->GetSingleSuccessor();
+    return true;
+  }
+  return false;
+}
+
+// Detect an early exit loop.
+static bool IsEarlyExit(HLoopInformation* loop_info) {
+  HBlocksInLoopReversePostOrderIterator it_loop(*loop_info);
+  for (it_loop.Advance(); !it_loop.Done(); it_loop.Advance()) {
+    for (HBasicBlock* successor : it_loop.Current()->GetSuccessors()) {
+      if (!loop_info->Contains(*successor)) {
         return true;
       }
     }
@@ -35,88 +52,6 @@
   return false;
 }
 
-static bool IsOnlyUsedAfterLoop(const HLoopInformation& loop_info,
-                                HPhi* phi, HInstruction* addsub) {
-  for (const HUseListNode<HInstruction*>& use : phi->GetUses()) {
-    if (use.GetUser() != addsub) {
-      HLoopInformation* other_loop_info = use.GetUser()->GetBlock()->GetLoopInformation();
-      if (other_loop_info != nullptr && other_loop_info->IsIn(loop_info)) {
-        return false;
-      }
-    }
-  }
-  return true;
-}
-
-// Find: phi: Phi(init, addsub)
-//       s:   SuspendCheck
-//       c:   Condition(phi, bound)
-//       i:   If(c)
-// TODO: Find a less pattern matching approach?
-static bool IsEmptyHeader(HBasicBlock* block, /*out*/ HInstruction** addsub) {
-  HInstruction* phi = block->GetFirstPhi();
-  if (phi != nullptr && phi->GetNext() == nullptr && IsPhiAddSub(phi->AsPhi(), addsub)) {
-    HInstruction* s = block->GetFirstInstruction();
-    if (s != nullptr && s->IsSuspendCheck()) {
-      HInstruction* c = s->GetNext();
-      if (c != nullptr && c->IsCondition() && c->GetUses().HasExactlyOneElement()) {
-        HInstruction* i = c->GetNext();
-        if (i != nullptr && i->IsIf() && i->InputAt(0) == c) {
-          // Check that phi is only used inside loop as expected.
-          for (const HUseListNode<HInstruction*>& use : phi->GetUses()) {
-            if (use.GetUser() != *addsub && use.GetUser() != c) {
-              return false;
-            }
-          }
-          return true;
-        }
-      }
-    }
-  }
-  return false;
-}
-
-static bool IsEmptyBody(HBasicBlock* block, HInstruction* addsub) {
-  HInstruction* phi = block->GetFirstPhi();
-  HInstruction* i = block->GetFirstInstruction();
-  return phi == nullptr && i == addsub && i->GetNext() != nullptr && i->GetNext()->IsGoto();
-}
-
-static HBasicBlock* TryRemovePreHeader(HBasicBlock* preheader, HBasicBlock* entry_block) {
-  if (preheader->GetPredecessors().size() == 1) {
-    HBasicBlock* entry = preheader->GetSinglePredecessor();
-    HInstruction* anchor = entry->GetLastInstruction();
-    // If the pre-header has a single predecessor we can remove it too if
-    // either the pre-header just contains a goto, or if the predecessor
-    // is not the entry block so we can push instructions backward
-    // (moving computation into the entry block is too dangerous!).
-    if (preheader->GetFirstInstruction() == nullptr ||
-        preheader->GetFirstInstruction()->IsGoto() ||
-        (entry != entry_block && anchor->IsGoto())) {
-      // Push non-goto statements backward to empty the pre-header.
-      for (HInstructionIterator it(preheader->GetInstructions()); !it.Done(); it.Advance()) {
-        HInstruction* instruction = it.Current();
-        if (!instruction->IsGoto()) {
-          if (!instruction->CanBeMoved()) {
-            return nullptr;  // pushing failed to move all
-          }
-          it.Current()->MoveBefore(anchor);
-        }
-      }
-      return entry;
-    }
-  }
-  return nullptr;
-}
-
-static void RemoveFromCycle(HInstruction* instruction) {
-  // A bit more elaborate than the usual instruction removal,
-  // since there may be a cycle in the use structure.
-  instruction->RemoveAsUserOfAllInputs();
-  instruction->RemoveEnvironmentUsers();
-  instruction->GetBlock()->RemoveInstructionOrPhi(instruction, /*ensure_safety=*/ false);
-}
-
 //
 // Class methods.
 //
@@ -127,7 +62,9 @@
       induction_range_(induction_analysis),
       loop_allocator_(nullptr),
       top_loop_(nullptr),
-      last_loop_(nullptr) {
+      last_loop_(nullptr),
+      iset_(nullptr),
+      induction_simplication_count_(0) {
 }
 
 void HLoopOptimization::Run() {
@@ -164,8 +101,14 @@
     }
   }
 
-  // Traverse the loop hierarchy inner-to-outer and optimize.
-  TraverseLoopsInnerToOuter(top_loop_);
+  // Traverse the loop hierarchy inner-to-outer and optimize. Traversal can use
+  // a temporary set that stores instructions using the phase-local allocator.
+  if (top_loop_ != nullptr) {
+    ArenaSet<HInstruction*> iset(loop_allocator_->Adapter(kArenaAllocLoopOptimization));
+    iset_ = &iset;
+    TraverseLoopsInnerToOuter(top_loop_);
+    iset_ = nullptr;  // detach
+  }
 }
 
 void HLoopOptimization::AddLoop(HLoopInformation* loop_info) {
@@ -194,61 +137,133 @@
 
 void HLoopOptimization::RemoveLoop(LoopNode* node) {
   DCHECK(node != nullptr);
-  // TODO: implement when needed (for current set of optimizations, we don't
-  // need to keep recorded loop hierarchy up to date, but as we get different
-  // traversal, we may want to remove the node from the hierarchy here.
+  DCHECK(node->inner == nullptr);
+  if (node->previous != nullptr) {
+    // Within sequence.
+    node->previous->next = node->next;
+    if (node->next != nullptr) {
+      node->next->previous = node->previous;
+    }
+  } else {
+    // First of sequence.
+    if (node->outer != nullptr) {
+      node->outer->inner = node->next;
+    } else {
+      top_loop_ = node->next;
+    }
+    if (node->next != nullptr) {
+      node->next->outer = node->outer;
+      node->next->previous = nullptr;
+    }
+  }
 }
 
 void HLoopOptimization::TraverseLoopsInnerToOuter(LoopNode* node) {
   for ( ; node != nullptr; node = node->next) {
+    int current_induction_simplification_count = induction_simplication_count_;
     if (node->inner != nullptr) {
       TraverseLoopsInnerToOuter(node->inner);
     }
-    // Visit loop after its inner loops have been visited.
+    // Visit loop after its inner loops have been visited. If the induction of any inner
+    // loop has been simplified, recompute the induction information of this loop first.
+    if (current_induction_simplification_count != induction_simplication_count_) {
+      induction_range_.ReVisit(node->loop_info);
+    }
+    SimplifyBlocks(node);
     SimplifyInduction(node);
-    RemoveIfEmptyLoop(node);
+    SimplifyBlocks(node);
+    if (node->inner == nullptr) {
+      RemoveIfEmptyInnerLoop(node);
+    }
   }
 }
 
 void HLoopOptimization::SimplifyInduction(LoopNode* node) {
   HBasicBlock* header = node->loop_info->GetHeader();
   HBasicBlock* preheader = node->loop_info->GetPreHeader();
-  // Scan the phis in the header to find opportunities to optimize induction.
+  // Scan the phis in the header to find opportunities to simplify an induction
+  // cycle that is only used outside the loop. Replace these uses, if any, with
+  // the last value and remove the induction cycle.
+  // Examples: for (int i = 0; x != null;   i++) { .... no i .... }
+  //           for (int i = 0; i < 10; i++, k++) { .... no k .... } return k;
   for (HInstructionIterator it(header->GetPhis()); !it.Done(); it.Advance()) {
     HPhi* phi = it.Current()->AsPhi();
-    HInstruction* addsub = nullptr;
-    // Find phi-add/sub cycle.
-    if (IsPhiAddSub(phi, &addsub)) {
-      // Simple case, the induction is only used by itself. Although redundant,
-      // later phases do not easily detect this property. Thus, eliminate here.
-      // Example: for (int i = 0; x != null; i++) { .... no i .... }
-      if (phi->GetUses().HasExactlyOneElement()) {
-        // Remove the cycle, including all uses. Even environment uses can be removed,
-        // since these computations have no effect at all.
-        RemoveFromCycle(phi);  // removes environment uses too
-        RemoveFromCycle(addsub);
-        continue;
+    iset_->clear();
+    int32_t use_count = 0;
+    if (IsPhiInduction(phi) &&
+        IsOnlyUsedAfterLoop(node->loop_info, phi, &use_count) &&
+        // No uses, or no early-exit with proper replacement.
+        (use_count == 0 ||
+         (!IsEarlyExit(node->loop_info) && TryReplaceWithLastValue(phi, preheader)))) {
+      for (HInstruction* i : *iset_) {
+        RemoveFromCycle(i);
       }
-      // Closed form case. Only the last value of the induction is needed. Remove all
-      // overhead from the loop, and replace subsequent uses with the last value.
-      // Example: for (int i = 0; i < 10; i++, k++) { .... no k .... } return k;
-      if (IsOnlyUsedAfterLoop(*node->loop_info, phi, addsub) &&
-          induction_range_.CanGenerateLastValue(phi)) {
-        HInstruction* last = induction_range_.GenerateLastValue(phi, graph_, preheader);
-        // Remove the cycle, replacing all uses. Even environment uses can consume the final
-        // value, since any first real use is outside the loop (although this may imply
-        // that deopting may look "ahead" a bit on the phi value).
-        ReplaceAllUses(phi, last, addsub);
-        RemoveFromCycle(phi);  // removes environment uses too
-        RemoveFromCycle(addsub);
-      }
+      induction_simplication_count_++;
     }
   }
 }
 
-void HLoopOptimization::RemoveIfEmptyLoop(LoopNode* node) {
+void HLoopOptimization::SimplifyBlocks(LoopNode* node) {
+  // Repeat the block simplifications until no more changes occur. Note that since
+  // each simplification consists of eliminating code (without introducing new code),
+  // this process is always finite.
+  bool changed;
+  do {
+    changed = false;
+    // Iterate over all basic blocks in the loop-body.
+    for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) {
+      HBasicBlock* block = it.Current();
+      // Remove dead instructions from the loop-body.
+      for (HBackwardInstructionIterator i(block->GetInstructions()); !i.Done(); i.Advance()) {
+        HInstruction* instruction = i.Current();
+        if (instruction->IsDeadAndRemovable()) {
+          changed = true;
+          block->RemoveInstruction(instruction);
+        }
+      }
+      // Remove trivial control flow blocks from the loop-body.
+      HBasicBlock* succ = nullptr;
+      if (IsGotoBlock(block, &succ) && succ->GetPredecessors().size() == 1) {
+        // Trivial goto block can be removed.
+        HBasicBlock* pred = block->GetSinglePredecessor();
+        changed = true;
+        pred->ReplaceSuccessor(block, succ);
+        block->RemoveDominatedBlock(succ);
+        block->DisconnectAndDelete();
+        pred->AddDominatedBlock(succ);
+        succ->SetDominator(pred);
+      } else if (block->GetSuccessors().size() == 2) {
+        // Trivial if block can be bypassed to either branch.
+        HBasicBlock* succ0 = block->GetSuccessors()[0];
+        HBasicBlock* succ1 = block->GetSuccessors()[1];
+        HBasicBlock* meet0 = nullptr;
+        HBasicBlock* meet1 = nullptr;
+        if (succ0 != succ1 &&
+            IsGotoBlock(succ0, &meet0) &&
+            IsGotoBlock(succ1, &meet1) &&
+            meet0 == meet1 &&  // meets again
+            meet0 != block &&  // no self-loop
+            meet0->GetPhis().IsEmpty()) {  // not used for merging
+          changed = true;
+          succ0->DisconnectAndDelete();
+          if (block->Dominates(meet0)) {
+            block->RemoveDominatedBlock(meet0);
+            succ1->AddDominatedBlock(meet0);
+            meet0->SetDominator(succ1);
+          }
+        }
+      }
+    }
+  } while (changed);
+}
+
+void HLoopOptimization::RemoveIfEmptyInnerLoop(LoopNode* node) {
   HBasicBlock* header = node->loop_info->GetHeader();
   HBasicBlock* preheader = node->loop_info->GetPreHeader();
+  // Ensure loop header logic is finite.
+  if (!induction_range_.IsFinite(node->loop_info)) {
+    return;
+  }
   // Ensure there is only a single loop-body (besides the header).
   HBasicBlock* body = nullptr;
   for (HBlocksInLoopIterator it(*node->loop_info); !it.Done(); it.Advance()) {
@@ -266,48 +281,119 @@
   HBasicBlock* exit = (header->GetSuccessors()[0] == body)
       ? header->GetSuccessors()[1]
       : header->GetSuccessors()[0];
-  // Ensure exit can only be reached by exiting loop (this seems typically the
-  // case anyway, and simplifies code generation below; TODO: perhaps relax?).
+  // Ensure exit can only be reached by exiting loop.
   if (exit->GetPredecessors().size() != 1) {
     return;
   }
-  // Detect an empty loop: no side effects other than plain iteration.
-  HInstruction* addsub = nullptr;
-  if (IsEmptyHeader(header, &addsub) && IsEmptyBody(body, addsub)) {
-    HBasicBlock* entry = TryRemovePreHeader(preheader, graph_->GetEntryBlock());
+  // Detect an empty loop: no side effects other than plain iteration. Replace
+  // subsequent index uses, if any, with the last value and remove the loop.
+  iset_->clear();
+  int32_t use_count = 0;
+  if (IsEmptyHeader(header) &&
+      IsEmptyBody(body) &&
+      IsOnlyUsedAfterLoop(node->loop_info, header->GetFirstPhi(), &use_count) &&
+      // No uses, or proper replacement.
+      (use_count == 0 || TryReplaceWithLastValue(header->GetFirstPhi(), preheader))) {
     body->DisconnectAndDelete();
     exit->RemovePredecessor(header);
     header->RemoveSuccessor(exit);
-    header->ClearDominanceInformation();
-    header->SetDominator(preheader);  // needed by next disconnect.
+    header->RemoveDominatedBlock(exit);
     header->DisconnectAndDelete();
-    // If allowed, remove preheader too, which may expose next outer empty loop
-    // Otherwise, link preheader directly to exit to restore the flow graph.
-    if (entry != nullptr) {
-      entry->ReplaceSuccessor(preheader, exit);
-      entry->AddDominatedBlock(exit);
-      exit->SetDominator(entry);
-      preheader->DisconnectAndDelete();
-    } else {
-      preheader->AddSuccessor(exit);
-      preheader->AddInstruction(new (graph_->GetArena()) HGoto());  // global allocator
-      preheader->AddDominatedBlock(exit);
-      exit->SetDominator(preheader);
-    }
+    preheader->AddSuccessor(exit);
+    preheader->AddInstruction(new (graph_->GetArena()) HGoto());  // global allocator
+    preheader->AddDominatedBlock(exit);
+    exit->SetDominator(preheader);
     // Update hierarchy.
     RemoveLoop(node);
   }
 }
 
-void HLoopOptimization::ReplaceAllUses(HInstruction* instruction,
-                                       HInstruction* replacement,
-                                       HInstruction* exclusion) {
+bool HLoopOptimization::IsPhiInduction(HPhi* phi) {
+  ArenaSet<HInstruction*>* set = induction_range_.LookupCycle(phi);
+  if (set != nullptr) {
+    DCHECK(iset_->empty());
+    for (HInstruction* i : *set) {
+      // Check that, other than instructions that are no longer in the graph (removed earlier)
+      // each instruction is removable and, other than the phi, uses are contained in the cycle.
+      if (!i->IsInBlock()) {
+        continue;
+      } else if (!i->IsRemovable()) {
+        return false;
+      } else if (i != phi) {
+        for (const HUseListNode<HInstruction*>& use : i->GetUses()) {
+          if (set->find(use.GetUser()) == set->end()) {
+            return false;
+          }
+        }
+      }
+      iset_->insert(i);  // copy
+    }
+    return true;
+  }
+  return false;
+}
+
+// Find: phi: Phi(init, addsub)
+//       s:   SuspendCheck
+//       c:   Condition(phi, bound)
+//       i:   If(c)
+// TODO: Find a less pattern matching approach?
+bool HLoopOptimization::IsEmptyHeader(HBasicBlock* block) {
+  DCHECK(iset_->empty());
+  HInstruction* phi = block->GetFirstPhi();
+  if (phi != nullptr && phi->GetNext() == nullptr && IsPhiInduction(phi->AsPhi())) {
+    HInstruction* s = block->GetFirstInstruction();
+    if (s != nullptr && s->IsSuspendCheck()) {
+      HInstruction* c = s->GetNext();
+      if (c != nullptr && c->IsCondition() && c->GetUses().HasExactlyOneElement()) {
+        HInstruction* i = c->GetNext();
+        if (i != nullptr && i->IsIf() && i->InputAt(0) == c) {
+          iset_->insert(c);
+          iset_->insert(s);
+          return true;
+        }
+      }
+    }
+  }
+  return false;
+}
+
+bool HLoopOptimization::IsEmptyBody(HBasicBlock* block) {
+  if (block->GetFirstPhi() == nullptr) {
+    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
+      HInstruction* instruction = it.Current();
+      if (!instruction->IsGoto() && iset_->find(instruction) == iset_->end()) {
+        return false;
+      }
+    }
+    return true;
+  }
+  return false;
+}
+
+bool HLoopOptimization::IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
+                                            HInstruction* instruction,
+                                            /*out*/ int32_t* use_count) {
+  for (const HUseListNode<HInstruction*>& use : instruction->GetUses()) {
+    HInstruction* user = use.GetUser();
+    if (iset_->find(user) == iset_->end()) {  // not excluded?
+      HLoopInformation* other_loop_info = user->GetBlock()->GetLoopInformation();
+      if (other_loop_info != nullptr && other_loop_info->IsIn(*loop_info)) {
+        return false;
+      }
+      ++*use_count;
+    }
+  }
+  return true;
+}
+
+void HLoopOptimization::ReplaceAllUses(HInstruction* instruction, HInstruction* replacement) {
   const HUseList<HInstruction*>& uses = instruction->GetUses();
   for (auto it = uses.begin(), end = uses.end(); it != end;) {
     HInstruction* user = it->GetUser();
     size_t index = it->GetIndex();
     ++it;  // increment before replacing
-    if (user != exclusion) {
+    if (iset_->find(user) == iset_->end()) {  // not excluded?
       user->ReplaceInput(replacement, index);
       induction_range_.Replace(user, instruction, replacement);  // update induction
     }
@@ -317,7 +403,7 @@
     HEnvironment* user = it->GetUser();
     size_t index = it->GetIndex();
     ++it;  // increment before replacing
-    if (user->GetHolder() != exclusion) {
+    if (iset_->find(user->GetHolder()) == iset_->end()) {  // not excluded?
       user->RemoveAsUserOfInput(index);
       user->SetRawEnvAt(index, replacement);
       replacement->AddEnvUseAt(user, index);
@@ -325,4 +411,16 @@
   }
 }
 
+bool HLoopOptimization::TryReplaceWithLastValue(HInstruction* instruction, HBasicBlock* block) {
+  // Try to replace outside uses with the last value. Environment uses can consume this
+  // value too, since any first true use is outside the loop (although this may imply
+  // that de-opting may look "ahead" a bit on the phi value). If there are only environment
+  // uses, the value is dropped altogether, since the computations have no effect.
+  if (induction_range_.CanGenerateLastValue(instruction)) {
+    ReplaceAllUses(instruction, induction_range_.GenerateLastValue(instruction, graph_, block));
+    return true;
+  }
+  return false;
+}
+
 }  // namespace art
diff --git a/compiler/optimizing/loop_optimization.h b/compiler/optimizing/loop_optimization.h
index 6092955..3391bef 100644
--- a/compiler/optimizing/loop_optimization.h
+++ b/compiler/optimizing/loop_optimization.h
@@ -46,7 +46,7 @@
           inner(nullptr),
           previous(nullptr),
           next(nullptr) {}
-    const HLoopInformation* const loop_info;
+    HLoopInformation* const loop_info;
     LoopNode* outer;
     LoopNode* inner;
     LoopNode* previous;
@@ -61,11 +61,18 @@
   void TraverseLoopsInnerToOuter(LoopNode* node);
 
   void SimplifyInduction(LoopNode* node);
-  void RemoveIfEmptyLoop(LoopNode* node);
+  void SimplifyBlocks(LoopNode* node);
+  void RemoveIfEmptyInnerLoop(LoopNode* node);
 
-  void ReplaceAllUses(HInstruction* instruction,
-                      HInstruction* replacement,
-                      HInstruction* exclusion);
+  bool IsPhiInduction(HPhi* phi);
+  bool IsEmptyHeader(HBasicBlock* block);
+  bool IsEmptyBody(HBasicBlock* block);
+
+  bool IsOnlyUsedAfterLoop(HLoopInformation* loop_info,
+                           HInstruction* instruction,
+                           /*out*/ int32_t* use_count);
+  void ReplaceAllUses(HInstruction* instruction, HInstruction* replacement);
+  bool TryReplaceWithLastValue(HInstruction* instruction, HBasicBlock* block);
 
   // Range information based on prior induction variable analysis.
   InductionVarRange induction_range_;
@@ -79,6 +86,15 @@
   LoopNode* top_loop_;
   LoopNode* last_loop_;
 
+  // Temporary bookkeeping of a set of instructions.
+  // Contents reside in phase-local heap memory.
+  ArenaSet<HInstruction*>* iset_;
+
+  // Counter that tracks how many induction cycles have been simplified. Useful
+  // to trigger incremental updates of induction variable analysis of outer loops
+  // when the induction of inner loops has changed.
+  int32_t induction_simplication_count_;
+
   friend class LoopOptimizationTest;
 
   DISALLOW_COPY_AND_ASSIGN(HLoopOptimization);
diff --git a/compiler/optimizing/nodes.cc b/compiler/optimizing/nodes.cc
index 874c1ed..680381a 100644
--- a/compiler/optimizing/nodes.cc
+++ b/compiler/optimizing/nodes.cc
@@ -35,7 +35,7 @@
 // double).
 static constexpr bool kEnableFloatingPointStaticEvaluation = (FLT_EVAL_METHOD == 0);
 
-void HGraph::InitializeInexactObjectRTI(StackHandleScopeCollection* handles) {
+void HGraph::InitializeInexactObjectRTI(VariableSizedHandleScope* handles) {
   ScopedObjectAccess soa(Thread::Current());
   // Create the inexact Object reference type and store it in the HGraph.
   ClassLinker* linker = Runtime::Current()->GetClassLinker();
@@ -179,16 +179,16 @@
 }
 
 void HGraph::ClearDominanceInformation() {
-  for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
-    it.Current()->ClearDominanceInformation();
+  for (HBasicBlock* block : GetReversePostOrder()) {
+    block->ClearDominanceInformation();
   }
   reverse_post_order_.clear();
 }
 
 void HGraph::ClearLoopInformation() {
   SetHasIrreducibleLoops(false);
-  for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
-    it.Current()->SetLoopInformation(nullptr);
+  for (HBasicBlock* block : GetReversePostOrder()) {
+    block->SetLoopInformation(nullptr);
   }
 }
 
@@ -275,8 +275,7 @@
     bool update_occurred = true;
     while (update_occurred) {
       update_occurred = false;
-      for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
-        HBasicBlock* block = it.Current();
+      for (HBasicBlock* block : GetReversePostOrder()) {
         for (HBasicBlock* successor : block->GetSuccessors()) {
           update_occurred |= UpdateDominatorOfSuccessor(block, successor);
         }
@@ -287,8 +286,7 @@
   // Make sure that there are no remaining blocks whose dominator information
   // needs to be updated.
   if (kIsDebugBuild) {
-    for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
-      HBasicBlock* block = it.Current();
+    for (HBasicBlock* block : GetReversePostOrder()) {
       for (HBasicBlock* successor : block->GetSuccessors()) {
         DCHECK(!UpdateDominatorOfSuccessor(block, successor));
       }
@@ -297,8 +295,7 @@
 
   // Populate `dominated_blocks_` information after computing all dominators.
   // The potential presence of irreducible loops requires to do it after.
-  for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
+  for (HBasicBlock* block : GetReversePostOrder()) {
     if (!block->IsEntryBlock()) {
       block->GetDominator()->AddDominatedBlock(block);
     }
@@ -375,8 +372,7 @@
 void HGraph::ComputeTryBlockInformation() {
   // Iterate in reverse post order to propagate try membership information from
   // predecessors to their successors.
-  for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
+  for (HBasicBlock* block : GetReversePostOrder()) {
     if (block->IsEntryBlock() || block->IsCatchBlock()) {
       // Catch blocks after simplification have only exceptional predecessors
       // and hence are never in tries.
@@ -446,8 +442,7 @@
   // We iterate post order to ensure we visit inner loops before outer loops.
   // `PopulateRecursive` needs this guarantee to know whether a natural loop
   // contains an irreducible loop.
-  for (HPostOrderIterator it(*this); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
+  for (HBasicBlock* block : GetPostOrder()) {
     if (block->IsLoopHeader()) {
       if (block->IsCatchBlock()) {
         // TODO: Dealing with exceptional back edges could be tricky because
@@ -740,6 +735,20 @@
   return true;
 }
 
+
+bool HLoopInformation::HasExitEdge() const {
+  // Determine if this loop has at least one exit edge.
+  HBlocksInLoopReversePostOrderIterator it_loop(*this);
+  for (; !it_loop.Done(); it_loop.Advance()) {
+    for (HBasicBlock* successor : it_loop.Current()->GetSuccessors()) {
+      if (!Contains(*successor)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 bool HBasicBlock::Dominates(HBasicBlock* other) const {
   // Walk up the dominator tree from `other`, to find out if `this`
   // is an ancestor.
@@ -1134,8 +1143,8 @@
 }
 
 void HGraphVisitor::VisitReversePostOrder() {
-  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    VisitBasicBlock(it.Current());
+  for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+    VisitBasicBlock(block);
   }
 }
 
@@ -1437,10 +1446,10 @@
   AddInstruction(new (GetGraph()->GetArena()) HGoto(new_block->GetDexPc()));
 
   for (HBasicBlock* successor : GetSuccessors()) {
-    new_block->successors_.push_back(successor);
     successor->predecessors_[successor->GetPredecessorIndexOf(this)] = new_block;
   }
-  successors_.clear();
+  new_block->successors_.swap(successors_);
+  DCHECK(successors_.empty());
   AddSuccessor(new_block);
 
   GetGraph()->AddBlock(new_block);
@@ -1454,10 +1463,10 @@
   HBasicBlock* new_block = new (GetGraph()->GetArena()) HBasicBlock(GetGraph(), GetDexPc());
 
   for (HBasicBlock* predecessor : GetPredecessors()) {
-    new_block->predecessors_.push_back(predecessor);
     predecessor->successors_[predecessor->GetSuccessorIndexOf(this)] = new_block;
   }
-  predecessors_.clear();
+  new_block->predecessors_.swap(predecessors_);
+  DCHECK(predecessors_.empty());
   AddPredecessor(new_block);
 
   GetGraph()->AddBlock(new_block);
@@ -1482,16 +1491,16 @@
   new_block->instructions_.SetBlockOfInstructions(new_block);
 
   for (HBasicBlock* successor : GetSuccessors()) {
-    new_block->successors_.push_back(successor);
     successor->predecessors_[successor->GetPredecessorIndexOf(this)] = new_block;
   }
-  successors_.clear();
+  new_block->successors_.swap(successors_);
+  DCHECK(successors_.empty());
 
   for (HBasicBlock* dominated : GetDominatedBlocks()) {
     dominated->dominator_ = new_block;
-    new_block->dominated_blocks_.push_back(dominated);
   }
-  dominated_blocks_.clear();
+  new_block->dominated_blocks_.swap(dominated_blocks_);
+  DCHECK(dominated_blocks_.empty());
   return new_block;
 }
 
@@ -1509,16 +1518,16 @@
 
   new_block->instructions_.SetBlockOfInstructions(new_block);
   for (HBasicBlock* successor : GetSuccessors()) {
-    new_block->successors_.push_back(successor);
     successor->predecessors_[successor->GetPredecessorIndexOf(this)] = new_block;
   }
-  successors_.clear();
+  new_block->successors_.swap(successors_);
+  DCHECK(successors_.empty());
 
   for (HBasicBlock* dominated : GetDominatedBlocks()) {
     dominated->dominator_ = new_block;
-    new_block->dominated_blocks_.push_back(dominated);
   }
-  dominated_blocks_.clear();
+  new_block->dominated_blocks_.swap(dominated_blocks_);
+  DCHECK(dominated_blocks_.empty());
   return new_block;
 }
 
@@ -1857,17 +1866,19 @@
 
   // Update links to the successors of `other`.
   successors_.clear();
-  while (!other->successors_.empty()) {
-    HBasicBlock* successor = other->GetSuccessors()[0];
-    successor->ReplacePredecessor(other, this);
+  for (HBasicBlock* successor : other->GetSuccessors()) {
+    successor->predecessors_[successor->GetPredecessorIndexOf(other)] = this;
   }
+  successors_.swap(other->successors_);
+  DCHECK(other->successors_.empty());
 
   // Update the dominator tree.
   RemoveDominatedBlock(other);
   for (HBasicBlock* dominated : other->GetDominatedBlocks()) {
-    dominated_blocks_.push_back(dominated);
     dominated->SetDominator(this);
   }
+  dominated_blocks_.insert(
+      dominated_blocks_.end(), other->dominated_blocks_.begin(), other->dominated_blocks_.end());
   other->dominated_blocks_.clear();
   other->dominator_ = nullptr;
 
@@ -1894,16 +1905,18 @@
 
   // Update links to the successors of `other`.
   successors_.clear();
-  while (!other->successors_.empty()) {
-    HBasicBlock* successor = other->GetSuccessors()[0];
-    successor->ReplacePredecessor(other, this);
+  for (HBasicBlock* successor : other->GetSuccessors()) {
+    successor->predecessors_[successor->GetPredecessorIndexOf(other)] = this;
   }
+  successors_.swap(other->successors_);
+  DCHECK(other->successors_.empty());
 
   // Update the dominator tree.
   for (HBasicBlock* dominated : other->GetDominatedBlocks()) {
-    dominated_blocks_.push_back(dominated);
     dominated->SetDominator(this);
   }
+  dominated_blocks_.insert(
+      dominated_blocks_.end(), other->dominated_blocks_.begin(), other->dominated_blocks_.end());
   other->dominated_blocks_.clear();
   other->dominator_ = nullptr;
   other->graph_ = nullptr;
@@ -1986,10 +1999,8 @@
   // Update the environments in this graph to have the invoke's environment
   // as parent.
   {
-    HReversePostOrderIterator it(*this);
-    it.Advance();  // Skip the entry block, we do not need to update the entry's suspend check.
-    for (; !it.Done(); it.Advance()) {
-      HBasicBlock* block = it.Current();
+    // Skip the entry block, we do not need to update the entry's suspend check.
+    for (HBasicBlock* block : GetReversePostOrderSkipEntryBlock()) {
       for (HInstructionIterator instr_it(block->GetInstructions());
            !instr_it.Done();
            instr_it.Advance()) {
@@ -2070,8 +2081,7 @@
 
     // Do a reverse post order of the blocks in the callee and do (1), (2), (3)
     // and (4) to the blocks that apply.
-    for (HReversePostOrderIterator it(*this); !it.Done(); it.Advance()) {
-      HBasicBlock* current = it.Current();
+    for (HBasicBlock* current : GetReversePostOrder()) {
       if (current != exit_block_ && current != entry_block_ && current != first) {
         DCHECK(current->GetTryCatchInformation() == nullptr);
         DCHECK(current->GetGraph() == this);
@@ -2295,7 +2305,7 @@
   ScopedObjectAccess soa(Thread::Current());
   os << "["
      << " is_valid=" << rhs.IsValid()
-     << " type=" << (!rhs.IsValid() ? "?" : PrettyClass(rhs.GetTypeHandle().Get()))
+     << " type=" << (!rhs.IsValid() ? "?" : mirror::Class::PrettyClass(rhs.GetTypeHandle().Get()))
      << " is_exact=" << rhs.IsExact()
      << " ]";
   return os;
@@ -2529,8 +2539,6 @@
       return os << "BootImageLinkTimePcRelative";
     case HLoadString::LoadKind::kBootImageAddress:
       return os << "BootImageAddress";
-    case HLoadString::LoadKind::kDexCacheAddress:
-      return os << "DexCacheAddress";
     case HLoadString::LoadKind::kBssEntry:
       return os << "BssEntry";
     case HLoadString::LoadKind::kDexCacheViaMethod:
diff --git a/compiler/optimizing/nodes.h b/compiler/optimizing/nodes.h
index 828c0e5..e0c582a 100644
--- a/compiler/optimizing/nodes.h
+++ b/compiler/optimizing/nodes.h
@@ -25,6 +25,7 @@
 #include "base/arena_containers.h"
 #include "base/arena_object.h"
 #include "base/array_ref.h"
+#include "base/iteration_range.h"
 #include "base/stl_util.h"
 #include "base/transform_array_ref.h"
 #include "dex_file.h"
@@ -336,7 +337,7 @@
   }
 
   // Acquires and stores RTI of inexact Object to be used when creating HNullConstant.
-  void InitializeInexactObjectRTI(StackHandleScopeCollection* handles);
+  void InitializeInexactObjectRTI(VariableSizedHandleScope* handles);
 
   ArenaAllocator* GetArena() const { return arena_; }
   const ArenaVector<HBasicBlock*>& GetBlocks() const { return blocks_; }
@@ -460,10 +461,23 @@
     return reverse_post_order_;
   }
 
+  ArrayRef<HBasicBlock* const> GetReversePostOrderSkipEntryBlock() {
+    DCHECK(GetReversePostOrder()[0] == entry_block_);
+    return ArrayRef<HBasicBlock* const>(GetReversePostOrder()).SubArray(1);
+  }
+
+  IterationRange<ArenaVector<HBasicBlock*>::const_reverse_iterator> GetPostOrder() const {
+    return ReverseRange(GetReversePostOrder());
+  }
+
   const ArenaVector<HBasicBlock*>& GetLinearOrder() const {
     return linear_order_;
   }
 
+  IterationRange<ArenaVector<HBasicBlock*>::const_reverse_iterator> GetLinearPostOrder() const {
+    return ReverseRange(GetLinearOrder());
+  }
+
   bool HasBoundsChecks() const {
     return has_bounds_checks_;
   }
@@ -755,6 +769,8 @@
 
   bool DominatesAllBackEdges(HBasicBlock* block);
 
+  bool HasExitEdge() const;
+
  private:
   // Internal recursive implementation of `Populate`.
   void PopulateRecursive(HBasicBlock* block);
@@ -1841,6 +1857,15 @@
   size_t InputCount() const { return GetInputRecords().size(); }
   HInstruction* InputAt(size_t i) const { return InputRecordAt(i).GetInstruction(); }
 
+  bool HasInput(HInstruction* input) const {
+    for (const HInstruction* i : GetInputs()) {
+      if (i == input) {
+        return true;
+      }
+    }
+    return false;
+  }
+
   void SetRawInputAt(size_t index, HInstruction* input) {
     SetRawInputRecordAt(index, HUserRecord<HInstruction*>(input));
   }
@@ -1931,6 +1956,22 @@
     return !HasEnvironmentUses() && GetUses().HasExactlyOneElement();
   }
 
+  bool IsRemovable() const {
+    return
+        !DoesAnyWrite() &&
+        !CanThrow() &&
+        !IsSuspendCheck() &&
+        !IsControlFlow() &&
+        !IsNativeDebugInfo() &&
+        !IsParameterValue() &&
+        // If we added an explicit barrier then we should keep it.
+        !IsMemoryBarrier();
+  }
+
+  bool IsDeadAndRemovable() const {
+    return IsRemovable() && !HasUses();
+  }
+
   // Does this instruction strictly dominate `other_instruction`?
   // Returns false if this instruction and `other_instruction` are the same.
   // Aborts if this instruction and `other_instruction` are both phis.
@@ -2080,10 +2121,10 @@
   // to the current method. Such instructions are:
   // (1): Instructions that require an environment, as calling the runtime requires
   //      to walk the stack and have the current method stored at a specific stack address.
-  // (2): Object literals like classes and strings, that are loaded from the dex cache
-  //      fields of the current method.
+  // (2): HCurrentMethod, potentially used by HInvokeStaticOrDirect, HLoadString, or HLoadClass
+  //      to access the dex cache.
   bool NeedsCurrentMethod() const {
-    return NeedsEnvironment() || IsLoadClass() || IsLoadString();
+    return NeedsEnvironment() || IsCurrentMethod();
   }
 
   // Returns whether the code generation of the instruction will require to have access
@@ -3743,6 +3784,8 @@
     return GetEnvironment()->IsFromInlinedInvoke();
   }
 
+  void SetCanThrow(bool can_throw) { SetPackedFlag<kFlagCanThrow>(can_throw); }
+
   bool CanThrow() const OVERRIDE { return GetPackedFlag<kFlagCanThrow>(); }
 
   bool CanBeMoved() const OVERRIDE { return IsIntrinsic(); }
@@ -3801,8 +3844,6 @@
     SetPackedFlag<kFlagCanThrow>(true);
   }
 
-  void SetCanThrow(bool can_throw) { SetPackedFlag<kFlagCanThrow>(can_throw); }
-
   uint32_t number_of_arguments_;
   ArtMethod* const resolved_method_;
   ArenaVector<HUserRecord<HInstruction*>> inputs_;
@@ -5640,10 +5681,6 @@
     // GetIncludePatchInformation().
     kBootImageAddress,
 
-    // Load from the resolved strings array at an absolute address.
-    // Used for strings outside the boot image referenced by JIT-compiled code.
-    kDexCacheAddress,
-
     // Load from an entry in the .bss section using a PC-relative load.
     // Used for strings outside boot image when .bss is accessible with a PC-relative load.
     kBssEntry,
@@ -5768,7 +5805,7 @@
   }
 
   static bool HasAddress(LoadKind load_kind) {
-    return load_kind == LoadKind::kBootImageAddress || load_kind == LoadKind::kDexCacheAddress;
+    return load_kind == LoadKind::kBootImageAddress;
   }
 
   void SetLoadKindInternal(LoadKind load_kind);
@@ -6602,58 +6639,6 @@
   DISALLOW_COPY_AND_ASSIGN(HGraphDelegateVisitor);
 };
 
-class HInsertionOrderIterator : public ValueObject {
- public:
-  explicit HInsertionOrderIterator(const HGraph& graph) : graph_(graph), index_(0) {}
-
-  bool Done() const { return index_ == graph_.GetBlocks().size(); }
-  HBasicBlock* Current() const { return graph_.GetBlocks()[index_]; }
-  void Advance() { ++index_; }
-
- private:
-  const HGraph& graph_;
-  size_t index_;
-
-  DISALLOW_COPY_AND_ASSIGN(HInsertionOrderIterator);
-};
-
-class HReversePostOrderIterator : public ValueObject {
- public:
-  explicit HReversePostOrderIterator(const HGraph& graph) : graph_(graph), index_(0) {
-    // Check that reverse post order of the graph has been built.
-    DCHECK(!graph.GetReversePostOrder().empty());
-  }
-
-  bool Done() const { return index_ == graph_.GetReversePostOrder().size(); }
-  HBasicBlock* Current() const { return graph_.GetReversePostOrder()[index_]; }
-  void Advance() { ++index_; }
-
- private:
-  const HGraph& graph_;
-  size_t index_;
-
-  DISALLOW_COPY_AND_ASSIGN(HReversePostOrderIterator);
-};
-
-class HPostOrderIterator : public ValueObject {
- public:
-  explicit HPostOrderIterator(const HGraph& graph)
-      : graph_(graph), index_(graph_.GetReversePostOrder().size()) {
-    // Check that reverse post order of the graph has been built.
-    DCHECK(!graph.GetReversePostOrder().empty());
-  }
-
-  bool Done() const { return index_ == 0; }
-  HBasicBlock* Current() const { return graph_.GetReversePostOrder()[index_ - 1u]; }
-  void Advance() { --index_; }
-
- private:
-  const HGraph& graph_;
-  size_t index_;
-
-  DISALLOW_COPY_AND_ASSIGN(HPostOrderIterator);
-};
-
 // Iterator over the blocks that art part of the loop. Includes blocks part
 // of an inner loop. The order in which the blocks are iterated is on their
 // block id.
diff --git a/compiler/optimizing/optimizing_cfi_test.cc b/compiler/optimizing/optimizing_cfi_test.cc
index a1e923b..013e110 100644
--- a/compiler/optimizing/optimizing_cfi_test.cc
+++ b/compiler/optimizing/optimizing_cfi_test.cc
@@ -52,7 +52,7 @@
   void SetUpFrame(InstructionSet isa) {
     // Setup simple context.
     std::string error;
-    isa_features_.reset(InstructionSetFeatures::FromVariant(isa, "default", &error));
+    isa_features_ = InstructionSetFeatures::FromVariant(isa, "default", &error);
     graph_ = CreateGraph(&allocator_);
     // Generate simple frame with some spills.
     code_gen_ = CodeGenerator::Create(graph_, isa, *isa_features_, opts_);
@@ -158,7 +158,8 @@
     TestImpl(isa, #isa, expected_asm, expected_cfi);          \
   }
 
-#ifdef ART_ENABLE_CODEGEN_arm
+// TODO(VIXL): Support this test for the VIXL backend.
+#if defined(ART_ENABLE_CODEGEN_arm) && !defined(ART_USE_VIXL_ARM_BACKEND)
 TEST_ISA(kThumb2)
 #endif
 #ifdef ART_ENABLE_CODEGEN_arm64
@@ -177,7 +178,8 @@
 TEST_ISA(kMips64)
 #endif
 
-#ifdef ART_ENABLE_CODEGEN_arm
+// TODO(VIXL): Support this test for the VIXL backend.
+#if defined(ART_ENABLE_CODEGEN_arm) && !defined(ART_USE_VIXL_ARM_BACKEND)
 TEST_F(OptimizingCFITest, kThumb2Adjust) {
   std::vector<uint8_t> expected_asm(
       expected_asm_kThumb2_adjust,
diff --git a/compiler/optimizing/optimizing_cfi_test_expected.inc b/compiler/optimizing/optimizing_cfi_test_expected.inc
index 6c5030c..f735dc8 100644
--- a/compiler/optimizing/optimizing_cfi_test_expected.inc
+++ b/compiler/optimizing/optimizing_cfi_test_expected.inc
@@ -1,10 +1,10 @@
 static constexpr uint8_t expected_asm_kThumb2[] = {
-    0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x90, 0x0B, 0xB0,
+    0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x0B, 0xB0,
     0xBD, 0xEC, 0x02, 0x8A, 0x60, 0xBD,
 };
 static constexpr uint8_t expected_cfi_kThumb2[] = {
     0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14,
-    0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x42, 0x0A, 0x42,
+    0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x0A, 0x42,
     0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x42, 0x0B, 0x0E,
     0x40,
 };
@@ -19,20 +19,19 @@
 // 0x00000006: .cfi_offset_extended: r81 at cfa-16
 // 0x00000006: sub sp, sp, #44
 // 0x00000008: .cfi_def_cfa_offset: 64
-// 0x00000008: str r0, [sp, #0]
-// 0x0000000a: .cfi_remember_state
-// 0x0000000a: add sp, sp, #44
-// 0x0000000c: .cfi_def_cfa_offset: 20
-// 0x0000000c: vpop.f32 {s16-s17}
-// 0x00000010: .cfi_def_cfa_offset: 12
-// 0x00000010: .cfi_restore_extended: r80
-// 0x00000010: .cfi_restore_extended: r81
-// 0x00000010: pop {r5, r6, pc}
-// 0x00000012: .cfi_restore_state
-// 0x00000012: .cfi_def_cfa_offset: 64
+// 0x00000008: .cfi_remember_state
+// 0x00000008: add sp, sp, #44
+// 0x0000000a: .cfi_def_cfa_offset: 20
+// 0x0000000a: vpop.f32 {s16-s17}
+// 0x0000000e: .cfi_def_cfa_offset: 12
+// 0x0000000e: .cfi_restore_extended: r80
+// 0x0000000e: .cfi_restore_extended: r81
+// 0x0000000e: pop {r5, r6, pc}
+// 0x00000010: .cfi_restore_state
+// 0x00000010: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kArm64[] = {
-    0xE0, 0x0F, 0x1C, 0xF8, 0xF4, 0x17, 0x00, 0xF9, 0xF5, 0x7B, 0x03, 0xA9,
+    0xFF, 0x03, 0x01, 0xD1, 0xF4, 0x17, 0x00, 0xF9, 0xF5, 0x7B, 0x03, 0xA9,
     0xE8, 0xA7, 0x01, 0x6D, 0xE8, 0xA7, 0x41, 0x6D, 0xF4, 0x17, 0x40, 0xF9,
     0xF5, 0x7B, 0x43, 0xA9, 0xFF, 0x03, 0x01, 0x91, 0xC0, 0x03, 0x5F, 0xD6,
 };
@@ -41,7 +40,7 @@
     0x05, 0x48, 0x0A, 0x05, 0x49, 0x08, 0x0A, 0x44, 0x06, 0x48, 0x06, 0x49,
     0x44, 0xD4, 0x44, 0xD5, 0xDE, 0x44, 0x0E, 0x00, 0x44, 0x0B, 0x0E, 0x40,
 };
-// 0x00000000: str x0, [sp, #-64]!
+// 0x00000000: sub sp, sp, #0x40 (64)
 // 0x00000004: .cfi_def_cfa_offset: 64
 // 0x00000004: str x20, [sp, #40]
 // 0x00000008: .cfi_offset: r20 at cfa-24
@@ -67,12 +66,12 @@
 // 0x00000024: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kX86[] = {
-    0x56, 0x55, 0x83, 0xEC, 0x34, 0x89, 0x04, 0x24, 0x83, 0xC4, 0x34, 0x5D,
+    0x56, 0x55, 0x83, 0xEC, 0x34, 0x83, 0xC4, 0x34, 0x5D,
     0x5E, 0xC3,
 };
 static constexpr uint8_t expected_cfi_kX86[] = {
     0x41, 0x0E, 0x08, 0x86, 0x02, 0x41, 0x0E, 0x0C, 0x85, 0x03, 0x43, 0x0E,
-    0x40, 0x43, 0x0A, 0x43, 0x0E, 0x0C, 0x41, 0x0E, 0x08, 0xC5, 0x41, 0x0E,
+    0x40, 0x0A, 0x43, 0x0E, 0x0C, 0x41, 0x0E, 0x08, 0xC5, 0x41, 0x0E,
     0x04, 0xC6, 0x41, 0x0B, 0x0E, 0x40,
 };
 // 0x00000000: push esi
@@ -83,29 +82,28 @@
 // 0x00000002: .cfi_offset: r5 at cfa-12
 // 0x00000002: sub esp, 52
 // 0x00000005: .cfi_def_cfa_offset: 64
-// 0x00000005: mov [esp], eax
-// 0x00000008: .cfi_remember_state
-// 0x00000008: add esp, 52
-// 0x0000000b: .cfi_def_cfa_offset: 12
-// 0x0000000b: pop ebp
-// 0x0000000c: .cfi_def_cfa_offset: 8
-// 0x0000000c: .cfi_restore: r5
-// 0x0000000c: pop esi
-// 0x0000000d: .cfi_def_cfa_offset: 4
-// 0x0000000d: .cfi_restore: r6
-// 0x0000000d: ret
-// 0x0000000e: .cfi_restore_state
-// 0x0000000e: .cfi_def_cfa_offset: 64
+// 0x00000005: .cfi_remember_state
+// 0x00000005: add esp, 52
+// 0x00000008: .cfi_def_cfa_offset: 12
+// 0x00000008: pop ebp
+// 0x0000000a: .cfi_def_cfa_offset: 8
+// 0x0000000a: .cfi_restore: r5
+// 0x0000000a: pop esi
+// 0x0000000b: .cfi_def_cfa_offset: 4
+// 0x0000000b: .cfi_restore: r6
+// 0x0000000b: ret
+// 0x0000000c: .cfi_restore_state
+// 0x0000000c: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kX86_64[] = {
     0x55, 0x53, 0x48, 0x83, 0xEC, 0x28, 0xF2, 0x44, 0x0F, 0x11, 0x6C, 0x24,
-    0x20, 0xF2, 0x44, 0x0F, 0x11, 0x64, 0x24, 0x18, 0x48, 0x89, 0x3C, 0x24,
+    0x20, 0xF2, 0x44, 0x0F, 0x11, 0x64, 0x24, 0x18,
     0xF2, 0x44, 0x0F, 0x10, 0x64, 0x24, 0x18, 0xF2, 0x44, 0x0F, 0x10, 0x6C,
     0x24, 0x20, 0x48, 0x83, 0xC4, 0x28, 0x5B, 0x5D, 0xC3,
 };
 static constexpr uint8_t expected_cfi_kX86_64[] = {
     0x41, 0x0E, 0x10, 0x86, 0x04, 0x41, 0x0E, 0x18, 0x83, 0x06, 0x44, 0x0E,
-    0x40, 0x47, 0x9E, 0x08, 0x47, 0x9D, 0x0A, 0x44, 0x0A, 0x47, 0xDD, 0x47,
+    0x40, 0x47, 0x9E, 0x08, 0x47, 0x9D, 0x0A, 0x0A, 0x47, 0xDD, 0x47,
     0xDE, 0x44, 0x0E, 0x18, 0x41, 0x0E, 0x10, 0xC3, 0x41, 0x0E, 0x08, 0xC6,
     0x41, 0x0B, 0x0E, 0x40,
 };
@@ -121,34 +119,33 @@
 // 0x0000000d: .cfi_offset: r30 at cfa-32
 // 0x0000000d: movsd [rsp + 24], xmm12
 // 0x00000014: .cfi_offset: r29 at cfa-40
-// 0x00000014: movq [rsp], rdi
-// 0x00000018: .cfi_remember_state
-// 0x00000018: movsd xmm12, [rsp + 24]
-// 0x0000001f: .cfi_restore: r29
-// 0x0000001f: movsd xmm13, [rsp + 32]
-// 0x00000026: .cfi_restore: r30
-// 0x00000026: addq rsp, 40
-// 0x0000002a: .cfi_def_cfa_offset: 24
-// 0x0000002a: pop rbx
-// 0x0000002b: .cfi_def_cfa_offset: 16
-// 0x0000002b: .cfi_restore: r3
-// 0x0000002b: pop rbp
-// 0x0000002c: .cfi_def_cfa_offset: 8
-// 0x0000002c: .cfi_restore: r6
-// 0x0000002c: ret
-// 0x0000002d: .cfi_restore_state
-// 0x0000002d: .cfi_def_cfa_offset: 64
+// 0x00000014: .cfi_remember_state
+// 0x00000014: movsd xmm12, [rsp + 24]
+// 0x0000001c: .cfi_restore: r29
+// 0x0000001c: movsd xmm13, [rsp + 32]
+// 0x00000022: .cfi_restore: r30
+// 0x00000022: addq rsp, 40
+// 0x00000026: .cfi_def_cfa_offset: 24
+// 0x00000026: pop rbx
+// 0x00000027: .cfi_def_cfa_offset: 16
+// 0x00000027: .cfi_restore: r3
+// 0x00000027: pop rbp
+// 0x00000028: .cfi_def_cfa_offset: 8
+// 0x00000028: .cfi_restore: r6
+// 0x00000028: ret
+// 0x00000029: .cfi_restore_state
+// 0x00000029: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kMips[] = {
     0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xB1, 0xAF,
     0x34, 0x00, 0xB0, 0xAF, 0x28, 0x00, 0xB6, 0xF7, 0x20, 0x00, 0xB4, 0xF7,
-    0x00, 0x00, 0xA4, 0xAF, 0x3C, 0x00, 0xBF, 0x8F, 0x38, 0x00, 0xB1, 0x8F,
+    0x3C, 0x00, 0xBF, 0x8F, 0x38, 0x00, 0xB1, 0x8F,
     0x34, 0x00, 0xB0, 0x8F, 0x28, 0x00, 0xB6, 0xD7, 0x20, 0x00, 0xB4, 0xD7,
     0x09, 0x00, 0xE0, 0x03, 0x40, 0x00, 0xBD, 0x27,
 };
 static constexpr uint8_t expected_cfi_kMips[] = {
     0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03,
-    0x4C, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x50, 0x0E, 0x00, 0x0B,
+    0x48, 0x0A, 0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x50, 0x0E, 0x00, 0x0B,
     0x0E, 0x40,
 };
 // 0x00000000: addiu r29, r29, -64
@@ -161,33 +158,33 @@
 // 0x00000010: .cfi_offset: r16 at cfa-12
 // 0x00000010: sdc1 f22, +40(r29)
 // 0x00000014: sdc1 f20, +32(r29)
-// 0x00000018: sw r4, +0(r29)
-// 0x0000001c: .cfi_remember_state
-// 0x0000001c: lw r31, +60(r29)
-// 0x00000020: .cfi_restore: r31
-// 0x00000020: lw r17, +56(r29)
-// 0x00000024: .cfi_restore: r17
-// 0x00000024: lw r16, +52(r29)
-// 0x00000028: .cfi_restore: r16
-// 0x00000028: ldc1 f22, +40(r29)
-// 0x0000002c: ldc1 f20, +32(r29)
-// 0x00000030: jr r31
-// 0x00000034: addiu r29, r29, 64
-// 0x00000038: .cfi_def_cfa_offset: 0
-// 0x00000038: .cfi_restore_state
-// 0x00000038: .cfi_def_cfa_offset: 64
+// 0x00000018: .cfi_remember_state
+// 0x00000018: lw r31, +60(r29)
+// 0x0000001c: .cfi_restore: r31
+// 0x0000001c: lw r17, +56(r29)
+// 0x00000020: .cfi_restore: r17
+// 0x00000020: lw r16, +52(r29)
+// 0x00000024: .cfi_restore: r16
+// 0x00000024: ldc1 f22, +40(r29)
+// 0x00000028: ldc1 f20, +32(r29)
+// 0x0000002c: jr r31
+// 0x00000030: addiu r29, r29, 64
+// 0x00000034: .cfi_def_cfa_offset: 0
+// 0x00000034: .cfi_restore_state
+// 0x00000034: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kMips64[] = {
     0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF,
     0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7,
-    0xE8, 0xFF, 0xBD, 0x67, 0x00, 0x00, 0xA4, 0xFF, 0x18, 0x00, 0xBD, 0x67,
+    0xE8, 0xFF, 0xBD, 0x67, 0x18, 0x00, 0xBD, 0x67,
     0x00, 0x00, 0xB8, 0xD7, 0x08, 0x00, 0xB9, 0xD7, 0x10, 0x00, 0xB0, 0xDF,
     0x18, 0x00, 0xB1, 0xDF, 0x20, 0x00, 0xBF, 0xDF, 0x28, 0x00, 0xBD, 0x67,
     0x09, 0x00, 0xE0, 0x03, 0x00, 0x00, 0x00, 0x00,
 };
+
 static constexpr uint8_t expected_cfi_kMips64[] = {
     0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
-    0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x44, 0x0A, 0x44,
+    0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x0A, 0x44,
     0x0E, 0x28, 0x44, 0xF8, 0x44, 0xF9, 0x44, 0xD0, 0x44, 0xD1, 0x44, 0xDF,
     0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
 };
@@ -205,29 +202,28 @@
 // 0x00000018: .cfi_offset: r56 at cfa-40
 // 0x00000018: daddiu r29, r29, -24
 // 0x0000001c: .cfi_def_cfa_offset: 64
-// 0x0000001c: sd r4, +0(r29)
-// 0x00000020: .cfi_remember_state
-// 0x00000020: daddiu r29, r29, 24
-// 0x00000024: .cfi_def_cfa_offset: 40
-// 0x00000024: ldc1 f24, +0(r29)
-// 0x00000028: .cfi_restore: r56
-// 0x00000028: ldc1 f25, +8(r29)
-// 0x0000002c: .cfi_restore: r57
-// 0x0000002c: ld r16, +16(r29)
-// 0x00000030: .cfi_restore: r16
-// 0x00000030: ld r17, +24(r29)
-// 0x00000034: .cfi_restore: r17
-// 0x00000034: ld r31, +32(r29)
-// 0x00000038: .cfi_restore: r31
-// 0x00000038: daddiu r29, r29, 40
-// 0x0000003c: .cfi_def_cfa_offset: 0
-// 0x0000003c: jr r31
-// 0x00000040: nop
-// 0x00000044: .cfi_restore_state
-// 0x00000044: .cfi_def_cfa_offset: 64
+// 0x0000001c: .cfi_remember_state
+// 0x0000001c: daddiu r29, r29, 24
+// 0x00000020: .cfi_def_cfa_offset: 40
+// 0x00000020: ldc1 f24, +0(r29)
+// 0x00000024: .cfi_restore: r56
+// 0x00000024: ldc1 f25, +8(r29)
+// 0x00000028: .cfi_restore: r57
+// 0x00000028: ld r16, +16(r29)
+// 0x0000002c: .cfi_restore: r16
+// 0x0000002c: ld r17, +24(r29)
+// 0x00000030: .cfi_restore: r17
+// 0x00000030: ld r31, +32(r29)
+// 0x00000034: .cfi_restore: r31
+// 0x00000034: daddiu r29, r29, 40
+// 0x00000038: .cfi_def_cfa_offset: 0
+// 0x00000038: jr r31
+// 0x0000003c: nop
+// 0x00000040: .cfi_restore_state
+// 0x00000040: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kThumb2_adjust[] = {
-    0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x90, 0x00, 0x28,
+    0x60, 0xB5, 0x2D, 0xED, 0x02, 0x8A, 0x8B, 0xB0, 0x00, 0x28,
     0x40, 0xD0, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
     0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
     0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68, 0x00, 0x68,
@@ -243,7 +239,7 @@
 };
 static constexpr uint8_t expected_cfi_kThumb2_adjust[] = {
     0x42, 0x0E, 0x0C, 0x85, 0x03, 0x86, 0x02, 0x8E, 0x01, 0x44, 0x0E, 0x14,
-    0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x02, 0x88, 0x0A,
+    0x05, 0x50, 0x05, 0x05, 0x51, 0x04, 0x42, 0x0E, 0x40, 0x02, 0x86, 0x0A,
     0x42, 0x0E, 0x14, 0x44, 0x0E, 0x0C, 0x06, 0x50, 0x06, 0x51, 0x42, 0x0B,
     0x0E, 0x40,
 };
@@ -258,9 +254,9 @@
 // 0x00000006: .cfi_offset_extended: r81 at cfa-16
 // 0x00000006: sub sp, sp, #44
 // 0x00000008: .cfi_def_cfa_offset: 64
-// 0x00000008: str r0, [sp, #0]
-// 0x0000000a: cmp r0, #0
-// 0x0000000c: beq +128 (0x00000090)
+// 0x00000008: cmp r0, #0
+// 0x0000000a: beq +128 (0x00000090)
+// 0x0000000c: ldr r0, [r0, #0]
 // 0x0000000e: ldr r0, [r0, #0]
 // 0x00000010: ldr r0, [r0, #0]
 // 0x00000012: ldr r0, [r0, #0]
@@ -325,22 +321,21 @@
 // 0x00000088: ldr r0, [r0, #0]
 // 0x0000008a: ldr r0, [r0, #0]
 // 0x0000008c: ldr r0, [r0, #0]
-// 0x0000008e: ldr r0, [r0, #0]
-// 0x00000090: .cfi_remember_state
-// 0x00000090: add sp, sp, #44
-// 0x00000092: .cfi_def_cfa_offset: 20
-// 0x00000092: vpop.f32 {s16-s17}
-// 0x00000096: .cfi_def_cfa_offset: 12
-// 0x00000096: .cfi_restore_extended: r80
-// 0x00000096: .cfi_restore_extended: r81
-// 0x00000096: pop {r5, r6, pc}
-// 0x00000098: .cfi_restore_state
-// 0x00000098: .cfi_def_cfa_offset: 64
+// 0x0000008e: .cfi_remember_state
+// 0x0000008e: add sp, sp, #44
+// 0x00000090: .cfi_def_cfa_offset: 20
+// 0x00000090: vpop.f32 {s16-s17}
+// 0x00000094: .cfi_def_cfa_offset: 12
+// 0x00000094: .cfi_restore_extended: r80
+// 0x00000094: .cfi_restore_extended: r81
+// 0x00000094: pop {r5, r6, pc}
+// 0x00000096: .cfi_restore_state
+// 0x00000096: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kMips_adjust_head[] = {
     0xC0, 0xFF, 0xBD, 0x27, 0x3C, 0x00, 0xBF, 0xAF, 0x38, 0x00, 0xB1, 0xAF,
     0x34, 0x00, 0xB0, 0xAF, 0x28, 0x00, 0xB6, 0xF7, 0x20, 0x00, 0xB4, 0xF7,
-    0x00, 0x00, 0xA4, 0xAF, 0x08, 0x00, 0x04, 0x14, 0xFC, 0xFF, 0xBD, 0x27,
+    0x08, 0x00, 0x04, 0x14, 0xFC, 0xFF, 0xBD, 0x27,
     0x00, 0x00, 0xBF, 0xAF, 0x00, 0x00, 0x10, 0x04, 0x02, 0x00, 0x01, 0x3C,
     0x18, 0x00, 0x21, 0x34, 0x21, 0x08, 0x3F, 0x00, 0x00, 0x00, 0xBF, 0x8F,
     0x09, 0x00, 0x20, 0x00, 0x04, 0x00, 0xBD, 0x27,
@@ -352,7 +347,7 @@
 };
 static constexpr uint8_t expected_cfi_kMips_adjust[] = {
     0x44, 0x0E, 0x40, 0x44, 0x9F, 0x01, 0x44, 0x91, 0x02, 0x44, 0x90, 0x03,
-    0x54, 0x0E, 0x44, 0x60, 0x0E, 0x40, 0x04, 0x04, 0x00, 0x02, 0x00, 0x0A,
+    0x50, 0x0E, 0x44, 0x60, 0x0E, 0x40, 0x04, 0x04, 0x00, 0x02, 0x00, 0x0A,
     0x44, 0xDF, 0x44, 0xD1, 0x44, 0xD0, 0x50, 0x0E, 0x00, 0x0B, 0x0E, 0x40,
 };
 // 0x00000000: addiu r29, r29, -64
@@ -365,41 +360,40 @@
 // 0x00000010: .cfi_offset: r16 at cfa-12
 // 0x00000010: sdc1 f22, +40(r29)
 // 0x00000014: sdc1 f20, +32(r29)
-// 0x00000018: sw r4, +0(r29)
-// 0x0000001c: bne r0, r4, 0x00000040 ; +36
-// 0x00000020: addiu r29, r29, -4
-// 0x00000024: .cfi_def_cfa_offset: 68
-// 0x00000024: sw r31, +0(r29)
-// 0x00000028: bltzal r0, 0x0000002c ; +4
-// 0x0000002c: lui r1, 0x20000
-// 0x00000030: ori r1, r1, 24
-// 0x00000034: addu r1, r1, r31
-// 0x00000038: lw r31, +0(r29)
-// 0x0000003c: jr r1
-// 0x00000040: addiu r29, r29, 4
-// 0x00000044: .cfi_def_cfa_offset: 64
-// 0x00000044: nop
+// 0x00000018: bne r0, r4, 0x00000040 ; +36
+// 0x0000001c: addiu r29, r29, -4
+// 0x00000020: .cfi_def_cfa_offset: 68
+// 0x00000020: sw r31, +0(r29)
+// 0x00000024: bltzal r0, 0x0000002c ; +4
+// 0x00000028: lui r1, 0x20000
+// 0x0000002c: ori r1, r1, 24
+// 0x00000030: addu r1, r1, r31
+// 0x00000034: lw r31, +0(r29)
+// 0x00000038: jr r1
+// 0x0000003c: addiu r29, r29, 4
+// 0x00000040: .cfi_def_cfa_offset: 64
+// 0x00000040: nop
 //             ...
-// 0x00020044: nop
-// 0x00020048: .cfi_remember_state
-// 0x00020048: lw r31, +60(r29)
-// 0x0002004c: .cfi_restore: r31
-// 0x0002004c: lw r17, +56(r29)
-// 0x00020050: .cfi_restore: r17
-// 0x00020050: lw r16, +52(r29)
-// 0x00020054: .cfi_restore: r16
-// 0x00020054: ldc1 f22, +40(r29)
-// 0x00020058: ldc1 f20, +32(r29)
-// 0x0002005c: jr r31
-// 0x00020060: addiu r29, r29, 64
-// 0x00020064: .cfi_def_cfa_offset: 0
-// 0x00020064: .cfi_restore_state
-// 0x00020064: .cfi_def_cfa_offset: 64
+// 0x00020040: nop
+// 0x00020044: .cfi_remember_state
+// 0x00020044: lw r31, +60(r29)
+// 0x00020048: .cfi_restore: r31
+// 0x00020048: lw r17, +56(r29)
+// 0x0002004c: .cfi_restore: r17
+// 0x0002004c: lw r16, +52(r29)
+// 0x00020050: .cfi_restore: r16
+// 0x00020050: ldc1 f22, +40(r29)
+// 0x00020054: ldc1 f20, +32(r29)
+// 0x00020058: jr r31
+// 0x0002005c: addiu r29, r29, 64
+// 0x00020060: .cfi_def_cfa_offset: 0
+// 0x00020060: .cfi_restore_state
+// 0x00020060: .cfi_def_cfa_offset: 64
 
 static constexpr uint8_t expected_asm_kMips64_adjust_head[] = {
     0xD8, 0xFF, 0xBD, 0x67, 0x20, 0x00, 0xBF, 0xFF, 0x18, 0x00, 0xB1, 0xFF,
     0x10, 0x00, 0xB0, 0xFF, 0x08, 0x00, 0xB9, 0xF7, 0x00, 0x00, 0xB8, 0xF7,
-    0xE8, 0xFF, 0xBD, 0x67, 0x00, 0x00, 0xA4, 0xFF, 0x02, 0x00, 0xA6, 0x60,
+    0xE8, 0xFF, 0xBD, 0x67, 0x02, 0x00, 0xA6, 0x60,
     0x02, 0x00, 0x3E, 0xEC, 0x0C, 0x00, 0x01, 0xD8,
 };
 static constexpr uint8_t expected_asm_kMips64_adjust_tail[] = {
@@ -409,7 +403,7 @@
 };
 static constexpr uint8_t expected_cfi_kMips64_adjust[] = {
     0x44, 0x0E, 0x28, 0x44, 0x9F, 0x02, 0x44, 0x91, 0x04, 0x44, 0x90, 0x06,
-    0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x04, 0x14, 0x00,
+    0x44, 0xB9, 0x08, 0x44, 0xB8, 0x0A, 0x44, 0x0E, 0x40, 0x04, 0x10, 0x00,
     0x02, 0x00, 0x0A, 0x44, 0x0E, 0x28, 0x44, 0xF8, 0x44, 0xF9, 0x44, 0xD0,
     0x44, 0xD1, 0x44, 0xDF, 0x44, 0x0E, 0x00, 0x48, 0x0B, 0x0E, 0x40,
 };
@@ -427,29 +421,28 @@
 // 0x00000018: .cfi_offset: r56 at cfa-40
 // 0x00000018: daddiu r29, r29, -24
 // 0x0000001c: .cfi_def_cfa_offset: 64
-// 0x0000001c: sd r4, +0(r29)
-// 0x00000020: bnec r5, r6, 0x0000002c ; +12
-// 0x00000024: auipc r1, 2
-// 0x00000028: jic r1, 12 ; b 0x00020030 ; +131080
-// 0x0000002c: nop
+// 0x0000001c: bnec r5, r6, 0x0000002c ; +12
+// 0x00000020: auipc r1, 2
+// 0x00000024: jic r1, 12 ; b 0x00020030 ; +131080
+// 0x00000028: nop
 //             ...
-// 0x0002002c: nop
-// 0x00020030: .cfi_remember_state
-// 0x00020030: daddiu r29, r29, 24
-// 0x00020034: .cfi_def_cfa_offset: 40
-// 0x00020034: ldc1 f24, +0(r29)
-// 0x00020038: .cfi_restore: r56
-// 0x00020038: ldc1 f25, +8(r29)
-// 0x0002003c: .cfi_restore: r57
-// 0x0002003c: ld r16, +16(r29)
-// 0x00020040: .cfi_restore: r16
-// 0x00020040: ld r17, +24(r29)
-// 0x00020044: .cfi_restore: r17
-// 0x00020044: ld r31, +32(r29)
-// 0x00020048: .cfi_restore: r31
-// 0x00020048: daddiu r29, r29, 40
-// 0x0002004c: .cfi_def_cfa_offset: 0
-// 0x0002004c: jr r31
-// 0x00020050: nop
-// 0x00020054: .cfi_restore_state
-// 0x00020054: .cfi_def_cfa_offset: 64
+// 0x00020028: nop
+// 0x0002002c: .cfi_remember_state
+// 0x0002002c: daddiu r29, r29, 24
+// 0x00020030: .cfi_def_cfa_offset: 40
+// 0x00020030: ldc1 f24, +0(r29)
+// 0x00020034: .cfi_restore: r56
+// 0x00020034: ldc1 f25, +8(r29)
+// 0x00020038: .cfi_restore: r57
+// 0x00020038: ld r16, +16(r29)
+// 0x0002003c: .cfi_restore: r16
+// 0x0002003c: ld r17, +24(r29)
+// 0x00020040: .cfi_restore: r17
+// 0x00020040: ld r31, +32(r29)
+// 0x00020044: .cfi_restore: r31
+// 0x00020044: daddiu r29, r29, 40
+// 0x00020047: .cfi_def_cfa_offset: 0
+// 0x00020048: jr r31
+// 0x0002004c: nop
+// 0x00020050: .cfi_restore_state
+// 0x00020050: .cfi_def_cfa_offset: 64
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index d6f8307..499514d 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -18,6 +18,8 @@
 
 #include <fstream>
 #include <memory>
+#include <sstream>
+
 #include <stdint.h>
 
 #ifdef ART_ENABLE_CODEGEN_arm
@@ -46,6 +48,7 @@
 #include "base/arena_containers.h"
 #include "base/dumpable.h"
 #include "base/macros.h"
+#include "base/mutex.h"
 #include "base/timing_logger.h"
 #include "bounds_check_elimination.h"
 #include "builder.h"
@@ -135,14 +138,18 @@
   PassObserver(HGraph* graph,
                CodeGenerator* codegen,
                std::ostream* visualizer_output,
-               CompilerDriver* compiler_driver)
+               CompilerDriver* compiler_driver,
+               Mutex& dump_mutex)
       : graph_(graph),
         cached_method_name_(),
         timing_logger_enabled_(compiler_driver->GetDumpPasses()),
         timing_logger_(timing_logger_enabled_ ? GetMethodName() : "", true, true),
         disasm_info_(graph->GetArena()),
+        visualizer_oss_(),
+        visualizer_output_(visualizer_output),
         visualizer_enabled_(!compiler_driver->GetCompilerOptions().GetDumpCfgFileName().empty()),
-        visualizer_(visualizer_output, graph, *codegen),
+        visualizer_(&visualizer_oss_, graph, *codegen),
+        visualizer_dump_mutex_(dump_mutex),
         graph_in_bad_state_(false) {
     if (timing_logger_enabled_ || visualizer_enabled_) {
       if (!IsVerboseMethod(compiler_driver, GetMethodName())) {
@@ -160,11 +167,13 @@
       LOG(INFO) << "TIMINGS " << GetMethodName();
       LOG(INFO) << Dumpable<TimingLogger>(timing_logger_);
     }
+    DCHECK(visualizer_oss_.str().empty());
   }
 
-  void DumpDisassembly() const {
+  void DumpDisassembly() REQUIRES(!visualizer_dump_mutex_) {
     if (visualizer_enabled_) {
       visualizer_.DumpGraphWithDisassembly();
+      FlushVisualizer();
     }
   }
 
@@ -173,30 +182,40 @@
   const char* GetMethodName() {
     // PrettyMethod() is expensive, so we delay calling it until we actually have to.
     if (cached_method_name_.empty()) {
-      cached_method_name_ = PrettyMethod(graph_->GetMethodIdx(), graph_->GetDexFile());
+      cached_method_name_ = graph_->GetDexFile().PrettyMethod(graph_->GetMethodIdx());
     }
     return cached_method_name_.c_str();
   }
 
  private:
-  void StartPass(const char* pass_name) {
+  void StartPass(const char* pass_name) REQUIRES(!visualizer_dump_mutex_) {
     VLOG(compiler) << "Starting pass: " << pass_name;
     // Dump graph first, then start timer.
     if (visualizer_enabled_) {
       visualizer_.DumpGraph(pass_name, /* is_after_pass */ false, graph_in_bad_state_);
+      FlushVisualizer();
     }
     if (timing_logger_enabled_) {
       timing_logger_.StartTiming(pass_name);
     }
   }
 
-  void EndPass(const char* pass_name) {
+  void FlushVisualizer() REQUIRES(!visualizer_dump_mutex_) {
+    MutexLock mu(Thread::Current(), visualizer_dump_mutex_);
+    *visualizer_output_ << visualizer_oss_.str();
+    visualizer_output_->flush();
+    visualizer_oss_.str("");
+    visualizer_oss_.clear();
+  }
+
+  void EndPass(const char* pass_name) REQUIRES(!visualizer_dump_mutex_) {
     // Pause timer first, then dump graph.
     if (timing_logger_enabled_) {
       timing_logger_.EndTiming();
     }
     if (visualizer_enabled_) {
       visualizer_.DumpGraph(pass_name, /* is_after_pass */ true, graph_in_bad_state_);
+      FlushVisualizer();
     }
 
     // Validate the HGraph if running in debug mode.
@@ -237,8 +256,11 @@
 
   DisassemblyInformation disasm_info_;
 
+  std::ostringstream visualizer_oss_;
+  std::ostream* visualizer_output_;
   bool visualizer_enabled_;
   HGraphVisualizer visualizer_;
+  Mutex& visualizer_dump_mutex_;
 
   // Flag to be set by the compiler if the pass failed and the graph is not
   // expected to validate.
@@ -319,7 +341,7 @@
                         CompilerDriver* driver,
                         const DexCompilationUnit& dex_compilation_unit,
                         PassObserver* pass_observer,
-                        StackHandleScopeCollection* handles) const;
+                        VariableSizedHandleScope* handles) const;
 
   void RunOptimizations(HOptimization* optimizations[],
                         size_t length,
@@ -358,7 +380,7 @@
                        CompilerDriver* driver,
                        const DexCompilationUnit& dex_compilation_unit,
                        PassObserver* pass_observer,
-                       StackHandleScopeCollection* handles) const;
+                       VariableSizedHandleScope* handles) const;
 
   void RunArchOptimizations(InstructionSet instruction_set,
                             HGraph* graph,
@@ -369,13 +391,16 @@
 
   std::unique_ptr<std::ostream> visualizer_output_;
 
+  mutable Mutex dump_mutex_;  // To synchronize visualizer writing.
+
   DISALLOW_COPY_AND_ASSIGN(OptimizingCompiler);
 };
 
 static const int kMaximumCompilationTimeBeforeWarning = 100; /* ms */
 
 OptimizingCompiler::OptimizingCompiler(CompilerDriver* driver)
-    : Compiler(driver, kMaximumCompilationTimeBeforeWarning) {}
+    : Compiler(driver, kMaximumCompilationTimeBeforeWarning),
+      dump_mutex_("Visualizer dump lock") {}
 
 void OptimizingCompiler::Init() {
   // Enable C1visualizer output. Must be done in Init() because the compiler
@@ -383,9 +408,6 @@
   CompilerDriver* driver = GetCompilerDriver();
   const std::string cfg_file_name = driver->GetCompilerOptions().GetDumpCfgFileName();
   if (!cfg_file_name.empty()) {
-    CHECK_EQ(driver->GetThreadCount(), 1U)
-      << "Graph visualizer requires the compiler to run single-threaded. "
-      << "Invoke the compiler with '-j1'.";
     std::ios_base::openmode cfg_file_mode =
         driver->GetCompilerOptions().GetDumpCfgAppend() ? std::ofstream::app : std::ofstream::out;
     visualizer_output_.reset(new std::ofstream(cfg_file_name, cfg_file_mode));
@@ -442,7 +464,7 @@
     CodeGenerator* codegen,
     CompilerDriver* driver,
     const DexCompilationUnit& dex_compilation_unit,
-    StackHandleScopeCollection* handles,
+    VariableSizedHandleScope* handles,
     SideEffectsAnalysis* most_recent_side_effects,
     HInductionVarAnalysis* most_recent_induction) {
   std::string opt_name = ConvertPassNameToOptimizationName(pass_name);
@@ -524,7 +546,7 @@
     CodeGenerator* codegen,
     CompilerDriver* driver,
     const DexCompilationUnit& dex_compilation_unit,
-    StackHandleScopeCollection* handles) {
+    VariableSizedHandleScope* handles) {
   // Few HOptimizations constructors require SideEffectsAnalysis or HInductionVarAnalysis
   // instances. This method assumes that each of them expects the nearest instance preceeding it
   // in the pass name list.
@@ -570,7 +592,7 @@
                                          CompilerDriver* driver,
                                          const DexCompilationUnit& dex_compilation_unit,
                                          PassObserver* pass_observer,
-                                         StackHandleScopeCollection* handles) const {
+                                         VariableSizedHandleScope* handles) const {
   OptimizingCompilerStats* stats = compilation_stats_.get();
   const CompilerOptions& compiler_options = driver->GetCompilerOptions();
   bool should_inline = (compiler_options.GetInlineDepthLimit() > 0)
@@ -603,11 +625,13 @@
   OptimizingCompilerStats* stats = compilation_stats_.get();
   ArenaAllocator* arena = graph->GetArena();
   switch (instruction_set) {
-#ifdef ART_ENABLE_CODEGEN_arm
+#if defined(ART_ENABLE_CODEGEN_arm)
     case kThumb2:
     case kArm: {
+#ifndef ART_USE_VIXL_ARM_BACKEND
       arm::DexCacheArrayFixups* fixups =
           new (arena) arm::DexCacheArrayFixups(graph, codegen, stats);
+#endif
       arm::InstructionSimplifierArm* simplifier =
           new (arena) arm::InstructionSimplifierArm(graph, stats);
       SideEffectsAnalysis* side_effects = new (arena) SideEffectsAnalysis(graph);
@@ -616,7 +640,9 @@
         simplifier,
         side_effects,
         gvn,
+#ifndef ART_USE_VIXL_ARM_BACKEND
         fixups
+#endif
       };
       RunOptimizations(arm_optimizations, arraysize(arm_optimizations), pass_observer);
       break;
@@ -707,7 +733,7 @@
                                           CompilerDriver* driver,
                                           const DexCompilationUnit& dex_compilation_unit,
                                           PassObserver* pass_observer,
-                                          StackHandleScopeCollection* handles) const {
+                                          VariableSizedHandleScope* handles) const {
   OptimizingCompilerStats* stats = compilation_stats_.get();
   ArenaAllocator* arena = graph->GetArena();
   if (driver->GetCompilerOptions().GetPassesToRun() != nullptr) {
@@ -727,8 +753,10 @@
   HDeadCodeElimination* dce1 = new (arena) HDeadCodeElimination(
       graph, stats, "dead_code_elimination$initial");
   HDeadCodeElimination* dce2 = new (arena) HDeadCodeElimination(
+      graph, stats, "dead_code_elimination$after_inlining");
+  HDeadCodeElimination* dce3 = new (arena) HDeadCodeElimination(
       graph, stats, "dead_code_elimination$final");
-  HConstantFolding* fold1 = new (arena) HConstantFolding(graph);
+  HConstantFolding* fold1 = new (arena) HConstantFolding(graph, "constant_folding");
   InstructionSimplifier* simplify1 = new (arena) InstructionSimplifier(graph, stats);
   HSelectGenerator* select_generator = new (arena) HSelectGenerator(graph, stats);
   HConstantFolding* fold2 = new (arena) HConstantFolding(
@@ -743,8 +771,10 @@
   HLoopOptimization* loop = new (arena) HLoopOptimization(graph, induction);
   HSharpening* sharpening = new (arena) HSharpening(graph, codegen, dex_compilation_unit, driver);
   InstructionSimplifier* simplify2 = new (arena) InstructionSimplifier(
-      graph, stats, "instruction_simplifier$after_bce");
+      graph, stats, "instruction_simplifier$after_inlining");
   InstructionSimplifier* simplify3 = new (arena) InstructionSimplifier(
+      graph, stats, "instruction_simplifier$after_bce");
+  InstructionSimplifier* simplify4 = new (arena) InstructionSimplifier(
       graph, stats, "instruction_simplifier$before_codegen");
   IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats);
 
@@ -764,6 +794,8 @@
     // redundant suspend checks to recognize empty blocks.
     select_generator,
     fold2,  // TODO: if we don't inline we can also skip fold2.
+    simplify2,
+    dce2,
     side_effects,
     gvn,
     licm,
@@ -771,13 +803,13 @@
     bce,
     loop,
     fold3,  // evaluates code generated by dynamic bce
-    simplify2,
+    simplify3,
     lse,
-    dce2,
+    dce3,
     // The codegen has a few assumptions that only the instruction simplifier
     // can satisfy. For example, the code generator does not expect to see a
     // HTypeConversion from a type to the same type.
-    simplify3,
+    simplify4,
   };
   RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);
 
@@ -943,13 +975,14 @@
   PassObserver pass_observer(graph,
                              codegen.get(),
                              visualizer_output_.get(),
-                             compiler_driver);
+                             compiler_driver,
+                             dump_mutex_);
 
   VLOG(compiler) << "Building " << pass_observer.GetMethodName();
 
   {
     ScopedObjectAccess soa(Thread::Current());
-    StackHandleScopeCollection handles(soa.Self());
+    VariableSizedHandleScope handles(soa.Self());
     // Do not hold `mutator_lock_` between optimizations.
     ScopedThreadSuspension sts(soa.Self(), kNative);
 
@@ -1044,7 +1077,7 @@
       if (kArenaAllocatorCountAllocations) {
         if (arena.BytesAllocated() > kArenaAllocatorMemoryReportThreshold) {
           MemStats mem_stats(arena.GetMemStats());
-          LOG(INFO) << PrettyMethod(method_idx, dex_file) << " " << Dumpable<MemStats>(mem_stats);
+          LOG(INFO) << dex_file.PrettyMethod(method_idx) << " " << Dumpable<MemStats>(mem_stats);
         }
       }
     }
@@ -1066,7 +1099,7 @@
     // instruction set is supported -- and has support for read
     // barriers, if they are enabled). This makes sure we're not
     // regressing.
-    std::string method_name = PrettyMethod(method_idx, dex_file);
+    std::string method_name = dex_file.PrettyMethod(method_idx);
     bool shouldCompile = method_name.find("$opt$") != std::string::npos;
     DCHECK((method != nullptr) || !shouldCompile) << "Didn't compile " << method_name;
   }
@@ -1131,7 +1164,7 @@
     if (kArenaAllocatorCountAllocations) {
       if (arena.BytesAllocated() > kArenaAllocatorMemoryReportThreshold) {
         MemStats mem_stats(arena.GetMemStats());
-        LOG(INFO) << PrettyMethod(method_idx, *dex_file) << " " << Dumpable<MemStats>(mem_stats);
+        LOG(INFO) << dex_file->PrettyMethod(method_idx) << " " << Dumpable<MemStats>(mem_stats);
       }
     }
   }
diff --git a/compiler/optimizing/optimizing_unit_test.h b/compiler/optimizing/optimizing_unit_test.h
index 2a23c92..58d9017 100644
--- a/compiler/optimizing/optimizing_unit_test.h
+++ b/compiler/optimizing/optimizing_unit_test.h
@@ -90,7 +90,7 @@
 
   {
     ScopedObjectAccess soa(Thread::Current());
-    StackHandleScopeCollection handles(soa.Self());
+    VariableSizedHandleScope handles(soa.Self());
     HGraphBuilder builder(graph, *item, &handles, return_type);
     bool graph_built = (builder.BuildGraph() == kAnalysisSuccess);
     return graph_built ? graph : nullptr;
diff --git a/compiler/optimizing/prepare_for_register_allocation.cc b/compiler/optimizing/prepare_for_register_allocation.cc
index 8fb5396..0db6088 100644
--- a/compiler/optimizing/prepare_for_register_allocation.cc
+++ b/compiler/optimizing/prepare_for_register_allocation.cc
@@ -20,8 +20,7 @@
 
 void PrepareForRegisterAllocation::Run() {
   // Order does not matter.
-  for (HReversePostOrderIterator it(*GetGraph()); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
+  for (HBasicBlock* block : GetGraph()->GetReversePostOrder()) {
     // No need to visit the phis.
     for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done();
          inst_it.Advance()) {
@@ -44,7 +43,7 @@
     // Add a fake environment for String.charAt() inline info as we want
     // the exception to appear as being thrown from there.
     const DexFile& dex_file = check->GetEnvironment()->GetDexFile();
-    DCHECK_STREQ(PrettyMethod(check->GetStringCharAtMethodIndex(), dex_file).c_str(),
+    DCHECK_STREQ(dex_file.PrettyMethod(check->GetStringCharAtMethodIndex()).c_str(),
                  "char java.lang.String.charAt(int)");
     ArenaAllocator* arena = GetGraph()->GetArena();
     HEnvironment* environment = new (arena) HEnvironment(arena,
@@ -129,6 +128,7 @@
   } else if (can_merge_with_load_class && !load_class->NeedsAccessCheck()) {
     // Pass the initialization duty to the `HLoadClass` instruction,
     // and remove the instruction from the graph.
+    DCHECK(load_class->HasEnvironment());
     load_class->SetMustGenerateClinitCheck(true);
     check->GetBlock()->RemoveInstruction(check);
   }
@@ -136,7 +136,7 @@
 
 void PrepareForRegisterAllocation::VisitNewInstance(HNewInstance* instruction) {
   HLoadClass* load_class = instruction->InputAt(0)->AsLoadClass();
-  bool has_only_one_use = load_class->HasOnlyOneNonEnvironmentUse();
+  const bool has_only_one_use = load_class->HasOnlyOneNonEnvironmentUse();
   // Change the entrypoint to kQuickAllocObject if either:
   // - the class is finalizable (only kQuickAllocObject handles finalizable classes),
   // - the class needs access checks (we do not know if it's finalizable),
@@ -144,19 +144,25 @@
   if (instruction->IsFinalizable() || has_only_one_use || load_class->NeedsAccessCheck()) {
     instruction->SetEntrypoint(kQuickAllocObject);
     instruction->ReplaceInput(GetGraph()->GetIntConstant(load_class->GetTypeIndex()), 0);
-    // The allocation entry point that deals with access checks does not work with inlined
-    // methods, so we need to check whether this allocation comes from an inlined method.
-    // We also need to make the same check as for moving clinit check, whether the HLoadClass
-    // has the clinit check responsibility or not (HLoadClass can throw anyway).
-    if (has_only_one_use &&
-        !instruction->GetEnvironment()->IsFromInlinedInvoke() &&
-        CanMoveClinitCheck(load_class, instruction)) {
-      // We can remove the load class from the graph. If it needed access checks, we delegate
-      // the access check to the allocation.
-      if (load_class->NeedsAccessCheck()) {
-        instruction->SetEntrypoint(kQuickAllocObjectWithAccessCheck);
+    if (has_only_one_use) {
+      // We've just removed the only use of the HLoadClass. Since we don't run DCE after this pass,
+      // do it manually if possible.
+      if (!load_class->CanThrow()) {
+        // If the load class can not throw, it has no side effects and can be removed if there is
+        // only one use.
+        load_class->GetBlock()->RemoveInstruction(load_class);
+      } else if (!instruction->GetEnvironment()->IsFromInlinedInvoke() &&
+          CanMoveClinitCheck(load_class, instruction)) {
+        // The allocation entry point that deals with access checks does not work with inlined
+        // methods, so we need to check whether this allocation comes from an inlined method.
+        // We also need to make the same check as for moving clinit check, whether the HLoadClass
+        // has the clinit check responsibility or not (HLoadClass can throw anyway).
+        // If it needed access checks, we delegate the access check to the allocation.
+        if (load_class->NeedsAccessCheck()) {
+          instruction->SetEntrypoint(kQuickAllocObjectWithAccessCheck);
+        }
+        load_class->GetBlock()->RemoveInstruction(load_class);
       }
-      load_class->GetBlock()->RemoveInstruction(load_class);
     }
   }
 }
diff --git a/compiler/optimizing/reference_type_propagation.cc b/compiler/optimizing/reference_type_propagation.cc
index 45a3ce4..d588dea 100644
--- a/compiler/optimizing/reference_type_propagation.cc
+++ b/compiler/optimizing/reference_type_propagation.cc
@@ -35,7 +35,7 @@
   }
 }
 
-static inline ReferenceTypeInfo::TypeHandle GetRootHandle(StackHandleScopeCollection* handles,
+static inline ReferenceTypeInfo::TypeHandle GetRootHandle(VariableSizedHandleScope* handles,
                                                           ClassLinker::ClassRoot class_root,
                                                           ReferenceTypeInfo::TypeHandle* cache) {
   if (!ReferenceTypeInfo::IsValidHandle(*cache)) {
@@ -109,7 +109,7 @@
 
 ReferenceTypePropagation::ReferenceTypePropagation(HGraph* graph,
                                                    Handle<mirror::DexCache> hint_dex_cache,
-                                                   StackHandleScopeCollection* handles,
+                                                   VariableSizedHandleScope* handles,
                                                    bool is_first_run,
                                                    const char* name)
     : HOptimization(graph, name),
@@ -123,8 +123,7 @@
   // TODO: move this to the graph checker.
   if (kIsDebugBuild) {
     ScopedObjectAccess soa(Thread::Current());
-    for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-      HBasicBlock* block = it.Current();
+    for (HBasicBlock* block : graph_->GetReversePostOrder()) {
       for (HInstructionIterator iti(block->GetInstructions()); !iti.Done(); iti.Advance()) {
         HInstruction* instr = iti.Current();
         if (instr->GetType() == Primitive::kPrimNot) {
@@ -158,8 +157,8 @@
   // To properly propagate type info we need to visit in the dominator-based order.
   // Reverse post order guarantees a node's dominators are visited first.
   // We take advantage of this order in `VisitBasicBlock`.
-  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    VisitBasicBlock(it.Current());
+  for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+    VisitBasicBlock(block);
   }
 
   ProcessWorklist();
@@ -448,9 +447,9 @@
       mirror::Class* declaring_class = method->GetDeclaringClass();
       DCHECK(declaring_class != nullptr);
       DCHECK(declaring_class->IsStringClass())
-          << "Expected String class: " << PrettyDescriptor(declaring_class);
+          << "Expected String class: " << declaring_class->PrettyDescriptor();
       DCHECK(method->IsConstructor())
-          << "Expected String.<init>: " << PrettyMethod(method);
+          << "Expected String.<init>: " << method->PrettyMethod();
     }
     instr->SetReferenceTypeInfo(
         ReferenceTypeInfo::Create(handle_cache_->GetStringClassHandle(), /* is_exact */ true));
@@ -517,7 +516,8 @@
   // The field index is unknown only during tests.
   if (info.GetFieldIndex() != kUnknownFieldIndex) {
     ClassLinker* cl = Runtime::Current()->GetClassLinker();
-    ArtField* field = cl->GetResolvedField(info.GetFieldIndex(), info.GetDexCache().Get());
+    ArtField* field = cl->GetResolvedField(info.GetFieldIndex(),
+                                           MakeObjPtr(info.GetDexCache().Get()));
     // TODO: There are certain cases where we can't resolve the field.
     // b/21914925 is open to keep track of a repro case for this issue.
     if (field != nullptr) {
diff --git a/compiler/optimizing/reference_type_propagation.h b/compiler/optimizing/reference_type_propagation.h
index 61428b2..4663471 100644
--- a/compiler/optimizing/reference_type_propagation.h
+++ b/compiler/optimizing/reference_type_propagation.h
@@ -34,7 +34,7 @@
  public:
   ReferenceTypePropagation(HGraph* graph,
                            Handle<mirror::DexCache> hint_dex_cache,
-                           StackHandleScopeCollection* handles,
+                           VariableSizedHandleScope* handles,
                            bool is_first_run,
                            const char* name = kReferenceTypePropagationPassName);
 
@@ -56,7 +56,7 @@
  private:
   class HandleCache {
    public:
-    explicit HandleCache(StackHandleScopeCollection* handles) : handles_(handles) { }
+    explicit HandleCache(VariableSizedHandleScope* handles) : handles_(handles) { }
 
     template <typename T>
     MutableHandle<T> NewHandle(T* object) REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -74,7 +74,7 @@
     ReferenceTypeInfo::TypeHandle GetThrowableClassHandle();
 
    private:
-    StackHandleScopeCollection* handles_;
+    VariableSizedHandleScope* handles_;
 
     ReferenceTypeInfo::TypeHandle object_class_handle_;
     ReferenceTypeInfo::TypeHandle class_class_handle_;
diff --git a/compiler/optimizing/reference_type_propagation_test.cc b/compiler/optimizing/reference_type_propagation_test.cc
index 75a4eac..b061c87 100644
--- a/compiler/optimizing/reference_type_propagation_test.cc
+++ b/compiler/optimizing/reference_type_propagation_test.cc
@@ -35,7 +35,7 @@
 
   ~ReferenceTypePropagationTest() { }
 
-  void SetupPropagation(StackHandleScopeCollection* handles) {
+  void SetupPropagation(VariableSizedHandleScope* handles) {
     graph_->InitializeInexactObjectRTI(handles);
     propagation_ = new (&allocator_) ReferenceTypePropagation(graph_,
                                                               Handle<mirror::DexCache>(),
@@ -79,7 +79,7 @@
 
 TEST_F(ReferenceTypePropagationTest, ProperSetup) {
   ScopedObjectAccess soa(Thread::Current());
-  StackHandleScopeCollection handles(soa.Self());
+  VariableSizedHandleScope handles(soa.Self());
   SetupPropagation(&handles);
 
   EXPECT_TRUE(propagation_ != nullptr);
@@ -88,7 +88,7 @@
 
 TEST_F(ReferenceTypePropagationTest, MergeInvalidTypes) {
   ScopedObjectAccess soa(Thread::Current());
-  StackHandleScopeCollection handles(soa.Self());
+  VariableSizedHandleScope handles(soa.Self());
   SetupPropagation(&handles);
 
   // Two invalid types.
@@ -120,7 +120,7 @@
 
 TEST_F(ReferenceTypePropagationTest, MergeValidTypes) {
   ScopedObjectAccess soa(Thread::Current());
-  StackHandleScopeCollection handles(soa.Self());
+  VariableSizedHandleScope handles(soa.Self());
   SetupPropagation(&handles);
 
   // Same types.
diff --git a/compiler/optimizing/register_allocation_resolver.cc b/compiler/optimizing/register_allocation_resolver.cc
index caf6647..5991791 100644
--- a/compiler/optimizing/register_allocation_resolver.cc
+++ b/compiler/optimizing/register_allocation_resolver.cc
@@ -374,7 +374,9 @@
       if (current->GetType() == Primitive::kPrimNot) {
         DCHECK(interval->GetDefinedBy()->IsActualObject())
             << interval->GetDefinedBy()->DebugName()
-            << "@" << safepoint_position->GetInstruction()->DebugName();
+            << '(' << interval->GetDefinedBy()->GetId() << ')'
+            << "@" << safepoint_position->GetInstruction()->DebugName()
+            << '(' << safepoint_position->GetInstruction()->GetId() << ')';
         LocationSummary* locations = safepoint_position->GetLocations();
         if (current->GetParent()->HasSpillSlot()) {
           locations->SetStackBit(current->GetParent()->GetSpillSlot() / kVRegSize);
diff --git a/compiler/optimizing/register_allocator_graph_color.cc b/compiler/optimizing/register_allocator_graph_color.cc
index 9610774..aa0d371 100644
--- a/compiler/optimizing/register_allocator_graph_color.cc
+++ b/compiler/optimizing/register_allocator_graph_color.cc
@@ -758,7 +758,7 @@
 }
 
 void RegisterAllocatorGraphColor::ProcessInstructions() {
-  for (HBasicBlock* block : LinearPostOrder(codegen_->GetGraph()->GetLinearOrder())) {
+  for (HBasicBlock* block : codegen_->GetGraph()->GetLinearPostOrder()) {
     // Note that we currently depend on this ordering, since some helper
     // code is designed for linear scan register allocation.
     for (HBackwardInstructionIterator instr_it(block->GetInstructions());
diff --git a/compiler/optimizing/register_allocator_linear_scan.cc b/compiler/optimizing/register_allocator_linear_scan.cc
index 4e69bc8..1a391ce 100644
--- a/compiler/optimizing/register_allocator_linear_scan.cc
+++ b/compiler/optimizing/register_allocator_linear_scan.cc
@@ -163,7 +163,7 @@
 void RegisterAllocatorLinearScan::AllocateRegistersInternal() {
   // Iterate post-order, to ensure the list is sorted, and the last added interval
   // is the one with the lowest start position.
-  for (HBasicBlock* block : LinearPostOrder(codegen_->GetGraph()->GetLinearOrder())) {
+  for (HBasicBlock* block : codegen_->GetGraph()->GetLinearPostOrder()) {
     for (HBackwardInstructionIterator back_it(block->GetInstructions()); !back_it.Done();
          back_it.Advance()) {
       ProcessInstruction(back_it.Current());
diff --git a/compiler/optimizing/select_generator.cc b/compiler/optimizing/select_generator.cc
index e409035..46d0d0e 100644
--- a/compiler/optimizing/select_generator.cc
+++ b/compiler/optimizing/select_generator.cc
@@ -76,8 +76,7 @@
   // Iterate in post order in the unlikely case that removing one occurrence of
   // the selection pattern empties a branch block of another occurrence.
   // Otherwise the order does not matter.
-  for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
+  for (HBasicBlock* block : graph_->GetPostOrder()) {
     if (!block->EndsWithIf()) continue;
 
     // Find elements of the diamond pattern.
diff --git a/compiler/optimizing/sharpening.cc b/compiler/optimizing/sharpening.cc
index c1cfe8d..63e4ca6 100644
--- a/compiler/optimizing/sharpening.cc
+++ b/compiler/optimizing/sharpening.cc
@@ -162,7 +162,6 @@
         ? compilation_unit_.GetDexCache()
         : hs.NewHandle(class_linker->FindDexCache(soa.Self(), dex_file));
     mirror::Class* klass = dex_cache->GetResolvedType(type_index);
-
     if (codegen_->GetCompilerOptions().IsBootImage()) {
       // Compiling boot image. Check if the class is a boot image class.
       DCHECK(!runtime->UseJitCompilation());
@@ -326,7 +325,6 @@
       load_string->SetLoadKindWithStringReference(load_kind, dex_file, string_index);
       break;
     case HLoadString::LoadKind::kBootImageAddress:
-    case HLoadString::LoadKind::kDexCacheAddress:
       DCHECK_NE(address, 0u);
       load_string->SetLoadKindWithAddress(load_kind, address);
       break;
diff --git a/compiler/optimizing/side_effects_analysis.cc b/compiler/optimizing/side_effects_analysis.cc
index 1dc6986..6d82e8e 100644
--- a/compiler/optimizing/side_effects_analysis.cc
+++ b/compiler/optimizing/side_effects_analysis.cc
@@ -26,8 +26,7 @@
 
   // In DEBUG mode, ensure side effects are properly initialized to empty.
   if (kIsDebugBuild) {
-    for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-      HBasicBlock* block = it.Current();
+    for (HBasicBlock* block : graph_->GetReversePostOrder()) {
       SideEffects effects = GetBlockEffects(block);
       DCHECK(effects.DoesNothing());
       if (block->IsLoopHeader()) {
@@ -38,9 +37,7 @@
   }
 
   // Do a post order visit to ensure we visit a loop header after its loop body.
-  for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
-
+  for (HBasicBlock* block : graph_->GetPostOrder()) {
     SideEffects effects = SideEffects::None();
     // Update `effects` with the side effects of all instructions in this block.
     for (HInstructionIterator inst_it(block->GetInstructions()); !inst_it.Done();
diff --git a/compiler/optimizing/ssa_builder.cc b/compiler/optimizing/ssa_builder.cc
index 03807ba..ae1e369 100644
--- a/compiler/optimizing/ssa_builder.cc
+++ b/compiler/optimizing/ssa_builder.cc
@@ -25,8 +25,8 @@
 
 void SsaBuilder::FixNullConstantType() {
   // The order doesn't matter here.
-  for (HReversePostOrderIterator itb(*graph_); !itb.Done(); itb.Advance()) {
-    for (HInstructionIterator it(itb.Current()->GetInstructions()); !it.Done(); it.Advance()) {
+  for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+    for (HInstructionIterator it(block->GetInstructions()); !it.Done(); it.Advance()) {
       HInstruction* equality_instr = it.Current();
       if (!equality_instr->IsEqual() && !equality_instr->IsNotEqual()) {
         continue;
@@ -57,8 +57,8 @@
 
 void SsaBuilder::EquivalentPhisCleanup() {
   // The order doesn't matter here.
-  for (HReversePostOrderIterator itb(*graph_); !itb.Done(); itb.Advance()) {
-    for (HInstructionIterator it(itb.Current()->GetPhis()); !it.Done(); it.Advance()) {
+  for (HBasicBlock* block : graph_->GetReversePostOrder()) {
+    for (HInstructionIterator it(block->GetPhis()); !it.Done(); it.Advance()) {
       HPhi* phi = it.Current()->AsPhi();
       HPhi* next = phi->GetNextEquivalentPhiWithSameType();
       if (next != nullptr) {
@@ -79,8 +79,7 @@
 }
 
 void SsaBuilder::FixEnvironmentPhis() {
-  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
+  for (HBasicBlock* block : graph_->GetReversePostOrder()) {
     for (HInstructionIterator it_phis(block->GetPhis()); !it_phis.Done(); it_phis.Advance()) {
       HPhi* phi = it_phis.Current()->AsPhi();
       // If the phi is not dead, or has no environment uses, there is nothing to do.
@@ -228,8 +227,7 @@
 void SsaBuilder::RunPrimitiveTypePropagation() {
   ArenaVector<HPhi*> worklist(graph_->GetArena()->Adapter(kArenaAllocGraphBuilder));
 
-  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
+  for (HBasicBlock* block : graph_->GetReversePostOrder()) {
     if (block->IsLoopHeader()) {
       for (HInstructionIterator phi_it(block->GetPhis()); !phi_it.Done(); phi_it.Advance()) {
         HPhi* phi = phi_it.Current()->AsPhi();
diff --git a/compiler/optimizing/ssa_builder.h b/compiler/optimizing/ssa_builder.h
index d7360ad..45dac54 100644
--- a/compiler/optimizing/ssa_builder.h
+++ b/compiler/optimizing/ssa_builder.h
@@ -49,7 +49,7 @@
  public:
   SsaBuilder(HGraph* graph,
              Handle<mirror::DexCache> dex_cache,
-             StackHandleScopeCollection* handles)
+             VariableSizedHandleScope* handles)
       : graph_(graph),
         dex_cache_(dex_cache),
         handles_(handles),
@@ -116,7 +116,7 @@
 
   HGraph* graph_;
   Handle<mirror::DexCache> dex_cache_;
-  StackHandleScopeCollection* const handles_;
+  VariableSizedHandleScope* const handles_;
 
   // True if types of ambiguous ArrayGets have been resolved.
   bool agets_fixed_;
diff --git a/compiler/optimizing/ssa_liveness_analysis.cc b/compiler/optimizing/ssa_liveness_analysis.cc
index 76cf8fe..e8e12e1 100644
--- a/compiler/optimizing/ssa_liveness_analysis.cc
+++ b/compiler/optimizing/ssa_liveness_analysis.cc
@@ -139,7 +139,7 @@
 void SsaLivenessAnalysis::ComputeLiveRanges() {
   // Do a post order visit, adding inputs of instructions live in the block where
   // that instruction is defined, and killing instructions that are being visited.
-  for (HBasicBlock* block : LinearPostOrder(graph_->GetLinearOrder())) {
+  for (HBasicBlock* block : ReverseRange(graph_->GetLinearOrder())) {
     BitVector* kill = GetKillSet(*block);
     BitVector* live_in = GetLiveInSet(*block);
 
@@ -256,15 +256,13 @@
   do {
     changed = false;
 
-    for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-      const HBasicBlock& block = *it.Current();
-
+    for (const HBasicBlock* block : graph_->GetPostOrder()) {
       // The live_in set depends on the kill set (which does not
       // change in this loop), and the live_out set.  If the live_out
       // set does not change, there is no need to update the live_in set.
-      if (UpdateLiveOut(block) && UpdateLiveIn(block)) {
+      if (UpdateLiveOut(*block) && UpdateLiveIn(*block)) {
         if (kIsDebugBuild) {
-          CheckNoLiveInIrreducibleLoop(block);
+          CheckNoLiveInIrreducibleLoop(*block);
         }
         changed = true;
       }
diff --git a/compiler/optimizing/ssa_phi_elimination.cc b/compiler/optimizing/ssa_phi_elimination.cc
index b1ec99a..aec7a3c 100644
--- a/compiler/optimizing/ssa_phi_elimination.cc
+++ b/compiler/optimizing/ssa_phi_elimination.cc
@@ -34,8 +34,7 @@
   ArenaSet<HPhi*> initially_live(graph_->GetArena()->Adapter(kArenaAllocSsaPhiElimination));
 
   // Add to the worklist phis referenced by non-phi instructions.
-  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
+  for (HBasicBlock* block : graph_->GetReversePostOrder()) {
     for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
       HPhi* phi = inst_it.Current()->AsPhi();
       if (phi->IsDead()) {
@@ -84,8 +83,7 @@
   // Remove phis that are not live. Visit in post order so that phis
   // that are not inputs of loop phis can be removed when they have
   // no users left (dead phis might use dead phis).
-  for (HPostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
+  for (HBasicBlock* block : graph_->GetPostOrder()) {
     HInstruction* current = block->GetFirstPhi();
     HInstruction* next = nullptr;
     HPhi* phi;
@@ -119,8 +117,7 @@
 void SsaRedundantPhiElimination::Run() {
   // Add all phis in the worklist. Order does not matter for correctness, and
   // neither will necessarily converge faster.
-  for (HReversePostOrderIterator it(*graph_); !it.Done(); it.Advance()) {
-    HBasicBlock* block = it.Current();
+  for (HBasicBlock* block : graph_->GetReversePostOrder()) {
     for (HInstructionIterator inst_it(block->GetPhis()); !inst_it.Done(); inst_it.Advance()) {
       worklist_.push_back(inst_it.Current()->AsPhi());
     }
diff --git a/compiler/utils/arm/assembler_arm_vixl.cc b/compiler/utils/arm/assembler_arm_vixl.cc
index 3c5973e..e3b9fb6 100644
--- a/compiler/utils/arm/assembler_arm_vixl.cc
+++ b/compiler/utils/arm/assembler_arm_vixl.cc
@@ -62,6 +62,12 @@
   ___ Rsb(reg, reg, 0);
 }
 
+void ArmVIXLAssembler::MaybePoisonHeapReference(vixl32::Register reg) {
+  if (kPoisonHeapReferences) {
+    PoisonHeapReference(reg);
+  }
+}
+
 void ArmVIXLAssembler::MaybeUnpoisonHeapReference(vixl32::Register reg) {
   if (kPoisonHeapReferences) {
     UnpoisonHeapReference(reg);
@@ -346,6 +352,51 @@
   ___ Vldr(reg, MemOperand(base, offset));
 }
 
+// Prefer Str to Add/Stm in ArmVIXLAssembler::StoreRegisterList and
+// ArmVIXLAssembler::LoadRegisterList where this generates less code (size).
+static constexpr int kRegListThreshold = 4;
+
+void ArmVIXLAssembler::StoreRegisterList(RegList regs, size_t stack_offset) {
+  int number_of_regs = POPCOUNT(static_cast<uint32_t>(regs));
+  if (number_of_regs != 0) {
+    if (number_of_regs > kRegListThreshold) {
+      UseScratchRegisterScope temps(GetVIXLAssembler());
+      vixl32::Register base = sp;
+      if (stack_offset != 0) {
+        base = temps.Acquire();
+        DCHECK_EQ(regs & (1u << base.GetCode()), 0u);
+        ___ Add(base, sp, stack_offset);
+      }
+      ___ Stm(base, NO_WRITE_BACK, RegisterList(regs));
+    } else {
+      for (uint32_t i : LowToHighBits(static_cast<uint32_t>(regs))) {
+        ___ Str(vixl32::Register(i), MemOperand(sp, stack_offset));
+        stack_offset += kRegSizeInBytes;
+      }
+    }
+  }
+}
+
+void ArmVIXLAssembler::LoadRegisterList(RegList regs, size_t stack_offset) {
+  int number_of_regs = POPCOUNT(static_cast<uint32_t>(regs));
+  if (number_of_regs != 0) {
+    if (number_of_regs > kRegListThreshold) {
+      UseScratchRegisterScope temps(GetVIXLAssembler());
+      vixl32::Register base = sp;
+      if (stack_offset != 0) {
+        base = temps.Acquire();
+        ___ Add(base, sp, stack_offset);
+      }
+      ___ Ldm(base, NO_WRITE_BACK, RegisterList(regs));
+    } else {
+      for (uint32_t i : LowToHighBits(static_cast<uint32_t>(regs))) {
+        ___ Ldr(vixl32::Register(i), MemOperand(sp, stack_offset));
+        stack_offset += kRegSizeInBytes;
+      }
+    }
+  }
+}
+
 void ArmVIXLAssembler::AddConstant(vixl32::Register rd, int32_t value) {
   AddConstant(rd, rd, value);
 }
diff --git a/compiler/utils/arm/assembler_arm_vixl.h b/compiler/utils/arm/assembler_arm_vixl.h
index c8f3a9b..e020628 100644
--- a/compiler/utils/arm/assembler_arm_vixl.h
+++ b/compiler/utils/arm/assembler_arm_vixl.h
@@ -72,6 +72,8 @@
   void PoisonHeapReference(vixl32::Register reg);
   // Unpoison a heap reference contained in `reg`.
   void UnpoisonHeapReference(vixl32::Register reg);
+  // Poison a heap reference contained in `reg` if heap poisoning is enabled.
+  void MaybePoisonHeapReference(vixl32::Register reg);
   // Unpoison a heap reference contained in `reg` if heap poisoning is enabled.
   void MaybeUnpoisonHeapReference(vixl32::Register reg);
 
@@ -90,8 +92,11 @@
   void LoadSFromOffset(vixl32::SRegister reg, vixl32::Register base, int32_t offset);
   void LoadDFromOffset(vixl32::DRegister reg, vixl32::Register base, int32_t offset);
 
+  void LoadRegisterList(RegList regs, size_t stack_offset);
+  void StoreRegisterList(RegList regs, size_t stack_offset);
+
   bool ShifterOperandCanAlwaysHold(uint32_t immediate);
-  bool ShifterOperandCanHold(Opcode opcode, uint32_t immediate, SetCc set_cc);
+  bool ShifterOperandCanHold(Opcode opcode, uint32_t immediate, SetCc set_cc = kCcDontCare);
   bool CanSplitLoadStoreOffset(int32_t allowed_offset_bits,
                                int32_t offset,
                                /*out*/ int32_t* add_to_base,
diff --git a/compiler/utils/arm/assembler_thumb2.cc b/compiler/utils/arm/assembler_thumb2.cc
index 61b7f08..1e71d06 100644
--- a/compiler/utils/arm/assembler_thumb2.cc
+++ b/compiler/utils/arm/assembler_thumb2.cc
@@ -2830,7 +2830,7 @@
 
 void Thumb2Assembler::clrex(Condition cond) {
   CheckCondition(cond);
-  int32_t encoding = B31 | B30 | B29 | B27 | B28 | B25 | B24 | B23 |
+  int32_t encoding = B31 | B30 | B29 | B28 | B25 | B24 | B23 |
       B21 | B20 |
       0xf << 16 |
       B15 |
diff --git a/compiler/utils/arm/assembler_thumb2_test.cc b/compiler/utils/arm/assembler_thumb2_test.cc
index d0799d6..30e8f4e 100644
--- a/compiler/utils/arm/assembler_thumb2_test.cc
+++ b/compiler/utils/arm/assembler_thumb2_test.cc
@@ -207,6 +207,13 @@
   DriverStr(expected, "strexd");
 }
 
+TEST_F(AssemblerThumb2Test, clrex) {
+  __ clrex();
+
+  const char* expected = "clrex\n";
+  DriverStr(expected, "clrex");
+}
+
 TEST_F(AssemblerThumb2Test, LdrdStrd) {
   __ ldrd(arm::R0, arm::Address(arm::R2, 8));
   __ ldrd(arm::R0, arm::Address(arm::R12));
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
index 14d29c4..23b2774 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.cc
@@ -49,7 +49,7 @@
   return dwarf::Reg::ArmFp(static_cast<int>(reg.GetCode()));
 }
 
-static constexpr size_t kFramePointerSize = static_cast<size_t>(kArmPointerSize);;
+static constexpr size_t kFramePointerSize = static_cast<size_t>(kArmPointerSize);
 
 void ArmVIXLJNIMacroAssembler::BuildFrame(size_t frame_size,
                                           ManagedRegister method_reg,
@@ -254,10 +254,10 @@
   return Load(m_dst.AsArm(), sp, src.Int32Value(), size);
 }
 
-void ArmVIXLJNIMacroAssembler::LoadFromThread(ManagedRegister m_dst ATTRIBUTE_UNUSED,
-                                              ThreadOffset32 src ATTRIBUTE_UNUSED,
-                                              size_t size ATTRIBUTE_UNUSED) {
-  UNIMPLEMENTED(FATAL);
+void ArmVIXLJNIMacroAssembler::LoadFromThread(ManagedRegister m_dst,
+                                              ThreadOffset32 src,
+                                              size_t size) {
+  return Load(m_dst.AsArm(), tr, src.Int32Value(), size);
 }
 
 void ArmVIXLJNIMacroAssembler::LoadRawPtrFromThread(ManagedRegister m_dst, ThreadOffset32 offs) {
@@ -558,6 +558,38 @@
   // TODO: think about using CBNZ here.
 }
 
+std::unique_ptr<JNIMacroLabel> ArmVIXLJNIMacroAssembler::CreateLabel() {
+  return std::unique_ptr<JNIMacroLabel>(new ArmVIXLJNIMacroLabel());
+}
+
+void ArmVIXLJNIMacroAssembler::Jump(JNIMacroLabel* label) {
+  CHECK(label != nullptr);
+  ___ B(ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
+}
+
+void ArmVIXLJNIMacroAssembler::Jump(JNIMacroLabel* label,
+                                    JNIMacroUnaryCondition condition,
+                                    ManagedRegister test) {
+  CHECK(label != nullptr);
+
+  switch (condition) {
+    case JNIMacroUnaryCondition::kZero:
+      ___ Cbz(test.AsArm().AsVIXLRegister(), ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
+      break;
+    case JNIMacroUnaryCondition::kNotZero:
+      ___ Cbnz(test.AsArm().AsVIXLRegister(), ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
+      break;
+    default:
+      LOG(FATAL) << "Not implemented unary condition: " << static_cast<int>(condition);
+      UNREACHABLE();
+  }
+}
+
+void ArmVIXLJNIMacroAssembler::Bind(JNIMacroLabel* label) {
+  CHECK(label != nullptr);
+  ___ Bind(ArmVIXLJNIMacroLabel::Cast(label)->AsArm());
+}
+
 void ArmVIXLJNIMacroAssembler::EmitExceptionPoll(
     ArmVIXLJNIMacroAssembler::ArmException* exception) {
   ___ Bind(exception->Entry());
@@ -588,9 +620,14 @@
   if (dest.IsNoRegister()) {
     CHECK_EQ(0u, size) << dest;
   } else if (dest.IsCoreRegister()) {
-    CHECK_EQ(4u, size) << dest;
     CHECK(!dest.AsVIXLRegister().Is(sp)) << dest;
-    ___ Ldr(dest.AsVIXLRegister(), MemOperand(base, offset));
+
+    if (size == 1u) {
+      ___ Ldrb(dest.AsVIXLRegister(), MemOperand(base, offset));
+    } else {
+      CHECK_EQ(4u, size) << dest;
+      ___ Ldr(dest.AsVIXLRegister(), MemOperand(base, offset));
+    }
   } else if (dest.IsRegisterPair()) {
     CHECK_EQ(8u, size) << dest;
     ___ Ldr(dest.AsVIXLRegisterPairLow(),  MemOperand(base, offset));
diff --git a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
index 9fc683d..f3baf1f 100644
--- a/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
+++ b/compiler/utils/arm/jni_macro_assembler_arm_vixl.h
@@ -187,6 +187,15 @@
   // and branch to a ExceptionSlowPath if it is.
   void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust);
 
+  // Create a new label that can be used with Jump/Bind calls.
+  std::unique_ptr<JNIMacroLabel> CreateLabel() OVERRIDE;
+  // Emit an unconditional jump to the label.
+  void Jump(JNIMacroLabel* label) OVERRIDE;
+  // Emit a conditional jump to the label by applying a unary condition test to the register.
+  void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) OVERRIDE;
+  // Code at this offset will serve as the target for the Jump call.
+  void Bind(JNIMacroLabel* label) OVERRIDE;
+
   void MemoryBarrier(ManagedRegister scratch) OVERRIDE;
 
   void EmitExceptionPoll(ArmVIXLJNIMacroAssembler::ArmException *exception);
@@ -219,6 +228,16 @@
   friend class ArmVIXLAssemblerTest_VixlStoreToOffset_Test;
 };
 
+class ArmVIXLJNIMacroLabel FINAL
+    : public JNIMacroLabelCommon<ArmVIXLJNIMacroLabel,
+                                 vixl32::Label,
+                                 kArm> {
+ public:
+  vixl32::Label* AsArm() {
+    return AsPlatformLabel();
+  }
+};
+
 }  // namespace arm
 }  // namespace art
 
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.cc b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
index dfdcd11..9cd6884 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.cc
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.cc
@@ -262,9 +262,12 @@
     ___ Ldr(reg_w(dest.AsWRegister()), MEM_OP(reg_x(base), offset));
   } else if (dest.IsXRegister()) {
     CHECK_NE(dest.AsXRegister(), SP) << dest;
-    if (size == 4u) {
+
+    if (size == 1u) {
+      ___ Ldrb(reg_w(dest.AsOverlappingWRegister()), MEM_OP(reg_x(base), offset));
+    } else if (size == 4u) {
       ___ Ldr(reg_w(dest.AsOverlappingWRegister()), MEM_OP(reg_x(base), offset));
-    } else {
+    }  else {
       CHECK_EQ(8u, size) << dest;
       ___ Ldr(reg_x(dest.AsXRegister()), MEM_OP(reg_x(base), offset));
     }
@@ -627,6 +630,38 @@
   ___ Cbnz(reg_x(scratch.AsXRegister()), exception_blocks_.back()->Entry());
 }
 
+std::unique_ptr<JNIMacroLabel> Arm64JNIMacroAssembler::CreateLabel() {
+  return std::unique_ptr<JNIMacroLabel>(new Arm64JNIMacroLabel());
+}
+
+void Arm64JNIMacroAssembler::Jump(JNIMacroLabel* label) {
+  CHECK(label != nullptr);
+  ___ B(Arm64JNIMacroLabel::Cast(label)->AsArm64());
+}
+
+void Arm64JNIMacroAssembler::Jump(JNIMacroLabel* label,
+                                  JNIMacroUnaryCondition condition,
+                                  ManagedRegister test) {
+  CHECK(label != nullptr);
+
+  switch (condition) {
+    case JNIMacroUnaryCondition::kZero:
+      ___ Cbz(reg_x(test.AsArm64().AsXRegister()), Arm64JNIMacroLabel::Cast(label)->AsArm64());
+      break;
+    case JNIMacroUnaryCondition::kNotZero:
+      ___ Cbnz(reg_x(test.AsArm64().AsXRegister()), Arm64JNIMacroLabel::Cast(label)->AsArm64());
+      break;
+    default:
+      LOG(FATAL) << "Not implemented unary condition: " << static_cast<int>(condition);
+      UNREACHABLE();
+  }
+}
+
+void Arm64JNIMacroAssembler::Bind(JNIMacroLabel* label) {
+  CHECK(label != nullptr);
+  ___ Bind(Arm64JNIMacroLabel::Cast(label)->AsArm64());
+}
+
 void Arm64JNIMacroAssembler::EmitExceptionPoll(Arm64Exception *exception) {
   UseScratchRegisterScope temps(asm_.GetVIXLAssembler());
   temps.Exclude(reg_x(exception->scratch_.AsXRegister()));
diff --git a/compiler/utils/arm64/jni_macro_assembler_arm64.h b/compiler/utils/arm64/jni_macro_assembler_arm64.h
index b9f6854..264e99a 100644
--- a/compiler/utils/arm64/jni_macro_assembler_arm64.h
+++ b/compiler/utils/arm64/jni_macro_assembler_arm64.h
@@ -168,6 +168,15 @@
   // and branch to a ExceptionSlowPath if it is.
   void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
 
+  // Create a new label that can be used with Jump/Bind calls.
+  std::unique_ptr<JNIMacroLabel> CreateLabel() OVERRIDE;
+  // Emit an unconditional jump to the label.
+  void Jump(JNIMacroLabel* label) OVERRIDE;
+  // Emit a conditional jump to the label by applying a unary condition test to the register.
+  void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) OVERRIDE;
+  // Code at this offset will serve as the target for the Jump call.
+  void Bind(JNIMacroLabel* label) OVERRIDE;
+
  private:
   class Arm64Exception {
    public:
@@ -222,6 +231,16 @@
   ArenaVector<std::unique_ptr<Arm64Exception>> exception_blocks_;
 };
 
+class Arm64JNIMacroLabel FINAL
+    : public JNIMacroLabelCommon<Arm64JNIMacroLabel,
+                                 vixl::aarch64::Label,
+                                 kArm64> {
+ public:
+  vixl::aarch64::Label* AsArm64() {
+    return AsPlatformLabel();
+  }
+};
+
 }  // namespace arm64
 }  // namespace art
 
diff --git a/compiler/utils/assembler_test.h b/compiler/utils/assembler_test.h
index 92b4c8e..b34e125 100644
--- a/compiler/utils/assembler_test.h
+++ b/compiler/utils/assembler_test.h
@@ -51,30 +51,30 @@
 
   typedef std::string (*TestFn)(AssemblerTest* assembler_test, Ass* assembler);
 
-  void DriverFn(TestFn f, std::string test_name) {
+  void DriverFn(TestFn f, const std::string& test_name) {
     DriverWrapper(f(this, assembler_.get()), test_name);
   }
 
   // This driver assumes the assembler has already been called.
-  void DriverStr(std::string assembly_string, std::string test_name) {
+  void DriverStr(const std::string& assembly_string, const std::string& test_name) {
     DriverWrapper(assembly_string, test_name);
   }
 
-  std::string RepeatR(void (Ass::*f)(Reg), std::string fmt) {
+  std::string RepeatR(void (Ass::*f)(Reg), const std::string& fmt) {
     return RepeatTemplatedRegister<Reg>(f,
         GetRegisters(),
         &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
         fmt);
   }
 
-  std::string Repeatr(void (Ass::*f)(Reg), std::string fmt) {
+  std::string Repeatr(void (Ass::*f)(Reg), const std::string& fmt) {
     return RepeatTemplatedRegister<Reg>(f,
         GetRegisters(),
         &AssemblerTest::GetRegName<RegisterView::kUseSecondaryName>,
         fmt);
   }
 
-  std::string RepeatRR(void (Ass::*f)(Reg, Reg), std::string fmt) {
+  std::string RepeatRR(void (Ass::*f)(Reg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegisters<Reg, Reg>(f,
         GetRegisters(),
         GetRegisters(),
@@ -83,7 +83,7 @@
         fmt);
   }
 
-  std::string RepeatRRNoDupes(void (Ass::*f)(Reg, Reg), std::string fmt) {
+  std::string RepeatRRNoDupes(void (Ass::*f)(Reg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegistersNoDupes<Reg, Reg>(f,
         GetRegisters(),
         GetRegisters(),
@@ -92,7 +92,7 @@
         fmt);
   }
 
-  std::string Repeatrr(void (Ass::*f)(Reg, Reg), std::string fmt) {
+  std::string Repeatrr(void (Ass::*f)(Reg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegisters<Reg, Reg>(f,
         GetRegisters(),
         GetRegisters(),
@@ -101,7 +101,7 @@
         fmt);
   }
 
-  std::string RepeatRRR(void (Ass::*f)(Reg, Reg, Reg), std::string fmt) {
+  std::string RepeatRRR(void (Ass::*f)(Reg, Reg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegisters<Reg, Reg, Reg>(f,
         GetRegisters(),
         GetRegisters(),
@@ -112,7 +112,7 @@
         fmt);
   }
 
-  std::string Repeatrb(void (Ass::*f)(Reg, Reg), std::string fmt) {
+  std::string Repeatrb(void (Ass::*f)(Reg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegisters<Reg, Reg>(f,
         GetRegisters(),
         GetRegisters(),
@@ -121,7 +121,7 @@
         fmt);
   }
 
-  std::string RepeatRr(void (Ass::*f)(Reg, Reg), std::string fmt) {
+  std::string RepeatRr(void (Ass::*f)(Reg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegisters<Reg, Reg>(f,
         GetRegisters(),
         GetRegisters(),
@@ -130,11 +130,11 @@
         fmt);
   }
 
-  std::string RepeatRI(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, std::string fmt) {
+  std::string RepeatRI(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, const std::string& fmt) {
     return RepeatRegisterImm<RegisterView::kUsePrimaryName>(f, imm_bytes, fmt);
   }
 
-  std::string Repeatri(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, std::string fmt) {
+  std::string Repeatri(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes, const std::string& fmt) {
     return RepeatRegisterImm<RegisterView::kUseSecondaryName>(f, imm_bytes, fmt);
   }
 
@@ -145,7 +145,7 @@
                                               const std::vector<Reg2*> reg2_registers,
                                               std::string (AssemblerTest::*GetName1)(const Reg1&),
                                               std::string (AssemblerTest::*GetName2)(const Reg2&),
-                                              std::string fmt) {
+                                              const std::string& fmt) {
     std::string str;
     std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0));
 
@@ -195,7 +195,7 @@
                                               std::string (AssemblerTest::*GetName1)(const Reg1&),
                                               std::string (AssemblerTest::*GetName2)(const Reg2&),
                                               int imm_bits,
-                                              std::string fmt) {
+                                              const std::string& fmt) {
     std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0));
 
     WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * imms.size());
@@ -245,7 +245,7 @@
                                              int imm_bits,
                                              const std::vector<Reg*> registers,
                                              std::string (AssemblerTest::*GetName)(const RegType&),
-                                             std::string fmt) {
+                                             const std::string& fmt) {
     std::string str;
     std::vector<int64_t> imms = CreateImmediateValuesBits(abs(imm_bits), (imm_bits > 0));
 
@@ -281,7 +281,7 @@
   }
 
   template <typename ImmType>
-  std::string RepeatRRIb(void (Ass::*f)(Reg, Reg, ImmType), int imm_bits, std::string fmt) {
+  std::string RepeatRRIb(void (Ass::*f)(Reg, Reg, ImmType), int imm_bits, const std::string& fmt) {
     return RepeatTemplatedRegistersImmBits<Reg, Reg, ImmType>(f,
         imm_bits,
         GetRegisters(),
@@ -292,7 +292,7 @@
   }
 
   template <typename ImmType>
-  std::string RepeatRIb(void (Ass::*f)(Reg, ImmType), int imm_bits, std::string fmt) {
+  std::string RepeatRIb(void (Ass::*f)(Reg, ImmType), int imm_bits, const std::string& fmt) {
     return RepeatTemplatedRegisterImmBits<Reg, ImmType>(f,
         imm_bits,
         GetRegisters(),
@@ -301,7 +301,9 @@
   }
 
   template <typename ImmType>
-  std::string RepeatFRIb(void (Ass::*f)(FPReg, Reg, ImmType), int imm_bits, std::string fmt) {
+  std::string RepeatFRIb(void (Ass::*f)(FPReg, Reg, ImmType),
+                         int imm_bits,
+                         const std::string& fmt) {
     return RepeatTemplatedRegistersImmBits<FPReg, Reg, ImmType>(f,
         imm_bits,
         GetFPRegisters(),
@@ -311,7 +313,7 @@
         fmt);
   }
 
-  std::string RepeatFF(void (Ass::*f)(FPReg, FPReg), std::string fmt) {
+  std::string RepeatFF(void (Ass::*f)(FPReg, FPReg), const std::string& fmt) {
     return RepeatTemplatedRegisters<FPReg, FPReg>(f,
                                                   GetFPRegisters(),
                                                   GetFPRegisters(),
@@ -320,7 +322,7 @@
                                                   fmt);
   }
 
-  std::string RepeatFFF(void (Ass::*f)(FPReg, FPReg, FPReg), std::string fmt) {
+  std::string RepeatFFF(void (Ass::*f)(FPReg, FPReg, FPReg), const std::string& fmt) {
     return RepeatTemplatedRegisters<FPReg, FPReg, FPReg>(f,
                                                          GetFPRegisters(),
                                                          GetFPRegisters(),
@@ -331,9 +333,21 @@
                                                          fmt);
   }
 
+  std::string RepeatFFR(void (Ass::*f)(FPReg, FPReg, Reg), const std::string& fmt) {
+    return RepeatTemplatedRegisters<FPReg, FPReg, Reg>(
+        f,
+        GetFPRegisters(),
+        GetFPRegisters(),
+        GetRegisters(),
+        &AssemblerTest::GetFPRegName,
+        &AssemblerTest::GetFPRegName,
+        &AssemblerTest::GetRegName<RegisterView::kUsePrimaryName>,
+        fmt);
+  }
+
   std::string RepeatFFI(void (Ass::*f)(FPReg, FPReg, const Imm&),
                         size_t imm_bytes,
-                        std::string fmt) {
+                        const std::string& fmt) {
     return RepeatTemplatedRegistersImm<FPReg, FPReg>(f,
                                                      GetFPRegisters(),
                                                      GetFPRegisters(),
@@ -344,7 +358,9 @@
   }
 
   template <typename ImmType>
-  std::string RepeatFFIb(void (Ass::*f)(FPReg, FPReg, ImmType), int imm_bits, std::string fmt) {
+  std::string RepeatFFIb(void (Ass::*f)(FPReg, FPReg, ImmType),
+                         int imm_bits,
+                         const std::string& fmt) {
     return RepeatTemplatedRegistersImmBits<FPReg, FPReg, ImmType>(f,
                                                                   imm_bits,
                                                                   GetFPRegisters(),
@@ -355,7 +371,9 @@
   }
 
   template <typename ImmType>
-  std::string RepeatIbFF(void (Ass::*f)(ImmType, FPReg, FPReg), int imm_bits, std::string fmt) {
+  std::string RepeatIbFF(void (Ass::*f)(ImmType, FPReg, FPReg),
+                         int imm_bits,
+                         const std::string& fmt) {
     return RepeatTemplatedImmBitsRegisters<ImmType, FPReg, FPReg>(f,
                                                                   GetFPRegisters(),
                                                                   GetFPRegisters(),
@@ -365,7 +383,7 @@
                                                                   fmt);
   }
 
-  std::string RepeatFR(void (Ass::*f)(FPReg, Reg), std::string fmt) {
+  std::string RepeatFR(void (Ass::*f)(FPReg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegisters<FPReg, Reg>(f,
         GetFPRegisters(),
         GetRegisters(),
@@ -374,7 +392,7 @@
         fmt);
   }
 
-  std::string RepeatFr(void (Ass::*f)(FPReg, Reg), std::string fmt) {
+  std::string RepeatFr(void (Ass::*f)(FPReg, Reg), const std::string& fmt) {
     return RepeatTemplatedRegisters<FPReg, Reg>(f,
         GetFPRegisters(),
         GetRegisters(),
@@ -383,7 +401,7 @@
         fmt);
   }
 
-  std::string RepeatRF(void (Ass::*f)(Reg, FPReg), std::string fmt) {
+  std::string RepeatRF(void (Ass::*f)(Reg, FPReg), const std::string& fmt) {
     return RepeatTemplatedRegisters<Reg, FPReg>(f,
         GetRegisters(),
         GetFPRegisters(),
@@ -392,7 +410,7 @@
         fmt);
   }
 
-  std::string RepeatrF(void (Ass::*f)(Reg, FPReg), std::string fmt) {
+  std::string RepeatrF(void (Ass::*f)(Reg, FPReg), const std::string& fmt) {
     return RepeatTemplatedRegisters<Reg, FPReg>(f,
         GetRegisters(),
         GetFPRegisters(),
@@ -401,7 +419,9 @@
         fmt);
   }
 
-  std::string RepeatI(void (Ass::*f)(const Imm&), size_t imm_bytes, std::string fmt,
+  std::string RepeatI(void (Ass::*f)(const Imm&),
+                      size_t imm_bytes,
+                      const std::string& fmt,
                       bool as_uint = false) {
     std::string str;
     std::vector<int64_t> imms = CreateImmediateValues(imm_bytes, as_uint);
@@ -639,7 +659,7 @@
   std::string RepeatTemplatedRegister(void (Ass::*f)(RegType),
                                       const std::vector<RegType*> registers,
                                       std::string (AssemblerTest::*GetName)(const RegType&),
-                                      std::string fmt) {
+                                      const std::string& fmt) {
     std::string str;
     for (auto reg : registers) {
       (assembler_.get()->*f)(*reg);
@@ -667,7 +687,7 @@
                                        const std::vector<Reg2*> reg2_registers,
                                        std::string (AssemblerTest::*GetName1)(const Reg1&),
                                        std::string (AssemblerTest::*GetName2)(const Reg2&),
-                                       std::string fmt) {
+                                       const std::string& fmt) {
     WarnOnCombinations(reg1_registers.size() * reg2_registers.size());
 
     std::string str;
@@ -705,7 +725,7 @@
                                               const std::vector<Reg2*> reg2_registers,
                                               std::string (AssemblerTest::*GetName1)(const Reg1&),
                                               std::string (AssemblerTest::*GetName2)(const Reg2&),
-                                              std::string fmt) {
+                                              const std::string& fmt) {
     WarnOnCombinations(reg1_registers.size() * reg2_registers.size());
 
     std::string str;
@@ -746,7 +766,7 @@
                                        std::string (AssemblerTest::*GetName1)(const Reg1&),
                                        std::string (AssemblerTest::*GetName2)(const Reg2&),
                                        std::string (AssemblerTest::*GetName3)(const Reg3&),
-                                       std::string fmt) {
+                                       const std::string& fmt) {
     std::string str;
     for (auto reg1 : reg1_registers) {
       for (auto reg2 : reg2_registers) {
@@ -791,7 +811,7 @@
                                           std::string (AssemblerTest::*GetName1)(const Reg1&),
                                           std::string (AssemblerTest::*GetName2)(const Reg2&),
                                           size_t imm_bytes,
-                                          std::string fmt) {
+                                          const std::string& fmt) {
     std::vector<int64_t> imms = CreateImmediateValues(imm_bytes);
     WarnOnCombinations(reg1_registers.size() * reg2_registers.size() * imms.size());
 
@@ -883,8 +903,9 @@
 
  private:
   template <RegisterView kRegView>
-  std::string RepeatRegisterImm(void (Ass::*f)(Reg, const Imm&), size_t imm_bytes,
-                                  std::string fmt) {
+  std::string RepeatRegisterImm(void (Ass::*f)(Reg, const Imm&),
+                                size_t imm_bytes,
+                                const std::string& fmt) {
     const std::vector<Reg*> registers = GetRegisters();
     std::string str;
     std::vector<int64_t> imms = CreateImmediateValues(imm_bytes);
@@ -926,7 +947,7 @@
   virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) {
   }
 
-  void DriverWrapper(std::string assembly_text, std::string test_name) {
+  void DriverWrapper(const std::string& assembly_text, const std::string& test_name) {
     assembler_->FinalizeCode();
     size_t cs = assembler_->CodeSize();
     std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs));
diff --git a/compiler/utils/assembler_test_base.h b/compiler/utils/assembler_test_base.h
index 8c71292..ac24ee9 100644
--- a/compiler/utils/assembler_test_base.h
+++ b/compiler/utils/assembler_test_base.h
@@ -106,7 +106,9 @@
   // Driver() assembles and compares the results. If the results are not equal and we have a
   // disassembler, disassemble both and check whether they have the same mnemonics (in which case
   // we just warn).
-  void Driver(const std::vector<uint8_t>& data, std::string assembly_text, std::string test_name) {
+  void Driver(const std::vector<uint8_t>& data,
+              const std::string& assembly_text,
+              const std::string& test_name) {
     EXPECT_NE(assembly_text.length(), 0U) << "Empty assembly";
 
     NativeAssemblerResult res;
@@ -229,7 +231,7 @@
     bool success = Exec(args, error_msg);
     if (!success) {
       LOG(ERROR) << "Assembler command line:";
-      for (std::string arg : args) {
+      for (const std::string& arg : args) {
         LOG(ERROR) << arg;
       }
     }
@@ -238,7 +240,7 @@
 
   // Runs objdump -h on the binary file and extracts the first line with .text.
   // Returns "" on failure.
-  std::string Objdump(std::string file) {
+  std::string Objdump(const std::string& file) {
     bool have_objdump = FileExists(FindTool(objdump_cmd_name_));
     EXPECT_TRUE(have_objdump) << "Cannot find objdump: " << GetObjdumpCommand();
     if (!have_objdump) {
@@ -287,8 +289,9 @@
   }
 
   // Disassemble both binaries and compare the text.
-  bool DisassembleBinaries(const std::vector<uint8_t>& data, const std::vector<uint8_t>& as,
-                           std::string test_name) {
+  bool DisassembleBinaries(const std::vector<uint8_t>& data,
+                           const std::vector<uint8_t>& as,
+                           const std::string& test_name) {
     std::string disassembler = GetDisassembleCommand();
     if (disassembler.length() == 0) {
       LOG(WARNING) << "No dissassembler command.";
@@ -324,7 +327,7 @@
     return result;
   }
 
-  bool DisassembleBinary(std::string file, std::string* error_msg) {
+  bool DisassembleBinary(const std::string& file, std::string* error_msg) {
     std::vector<std::string> args;
 
     // Encaspulate the whole command line in a single string passed to
@@ -345,7 +348,7 @@
     return Exec(args, error_msg);
   }
 
-  std::string WriteToFile(const std::vector<uint8_t>& buffer, std::string test_name) {
+  std::string WriteToFile(const std::vector<uint8_t>& buffer, const std::string& test_name) {
     std::string file_name = GetTmpnam() + std::string("---") + test_name;
     const char* data = reinterpret_cast<const char*>(buffer.data());
     std::ofstream s_out(file_name + ".o");
@@ -354,7 +357,7 @@
     return file_name + ".o";
   }
 
-  bool CompareFiles(std::string f1, std::string f2) {
+  bool CompareFiles(const std::string& f1, const std::string& f2) {
     std::ifstream f1_in(f1);
     std::ifstream f2_in(f2);
 
@@ -369,7 +372,9 @@
   }
 
   // Compile the given assembly code and extract the binary, if possible. Put result into res.
-  bool Compile(std::string assembly_code, NativeAssemblerResult* res, std::string test_name) {
+  bool Compile(const std::string& assembly_code,
+               NativeAssemblerResult* res,
+               const std::string& test_name) {
     res->ok = false;
     res->code.reset(nullptr);
 
@@ -438,7 +443,7 @@
   // Check whether file exists. Is used for commands, so strips off any parameters: anything after
   // the first space. We skip to the last slash for this, so it should work with directories with
   // spaces.
-  static bool FileExists(std::string file) {
+  static bool FileExists(const std::string& file) {
     if (file.length() == 0) {
       return false;
     }
@@ -478,7 +483,7 @@
     return getcwd(temp, 1024) ? std::string(temp) + "/" : std::string("");
   }
 
-  std::string FindTool(std::string tool_name) {
+  std::string FindTool(const std::string& tool_name) {
     // Find the current tool. Wild-card pattern is "arch-string*tool-name".
     std::string gcc_path = GetRootPath() + GetGCCRootPath();
     std::vector<std::string> args;
@@ -522,7 +527,8 @@
 
   // Helper for below. If name_predicate is empty, search for all files, otherwise use it for the
   // "-name" option.
-  static void FindToolDumpPrintout(std::string name_predicate, std::string tmp_file) {
+  static void FindToolDumpPrintout(const std::string& name_predicate,
+                                   const std::string& tmp_file) {
     std::string gcc_path = GetRootPath() + GetGCCRootPath();
     std::vector<std::string> args;
     args.push_back("find");
@@ -562,7 +568,7 @@
   }
 
   // For debug purposes.
-  void FindToolDump(std::string tool_name) {
+  void FindToolDump(const std::string& tool_name) {
     // Check with the tool name.
     FindToolDumpPrintout(architecture_string_ + "*" + tool_name, GetTmpnam());
     FindToolDumpPrintout("", GetTmpnam());
diff --git a/compiler/utils/assembler_thumb_test.cc b/compiler/utils/assembler_thumb_test.cc
index 86a4aa2..10bed13 100644
--- a/compiler/utils/assembler_thumb_test.cc
+++ b/compiler/utils/assembler_thumb_test.cc
@@ -158,7 +158,7 @@
       }
       if (CompareIgnoringSpace(results[lineindex], testline) != 0) {
         LOG(FATAL) << "Output is not as expected at line: " << lineindex
-          << results[lineindex] << "/" << testline;
+          << results[lineindex] << "/" << testline << ", test name: " << testname;
       }
       ++lineindex;
     }
diff --git a/compiler/utils/assembler_thumb_test_expected.cc.inc b/compiler/utils/assembler_thumb_test_expected.cc.inc
index 91f3970..69e1d8f 100644
--- a/compiler/utils/assembler_thumb_test_expected.cc.inc
+++ b/compiler/utils/assembler_thumb_test_expected.cc.inc
@@ -5544,7 +5544,7 @@
   " 10c:  ecbd 8a10   vpop  {s16-s31}\n",
   " 110:  e8bd 8de0   ldmia.w sp!, {r5, r6, r7, r8, sl, fp, pc}\n",
   " 114:  4660        mov r0, ip\n",
-  " 116:  f8d9 c2ac   ldr.w ip, [r9, #684]  ; 0x2ac\n",
+  " 116:  f8d9 c2b0   ldr.w ip, [r9, #688]  ; 0x2b0\n",
   " 11a:  47e0        blx ip\n",
   nullptr
 };
diff --git a/compiler/utils/jni_macro_assembler.h b/compiler/utils/jni_macro_assembler.h
index 0119ae9..59a1a48 100644
--- a/compiler/utils/jni_macro_assembler.h
+++ b/compiler/utils/jni_macro_assembler.h
@@ -35,6 +35,12 @@
 class DebugFrameOpCodeWriterForAssembler;
 class InstructionSetFeatures;
 class MemoryRegion;
+class JNIMacroLabel;
+
+enum class JNIMacroUnaryCondition {
+  kZero,
+  kNotZero
+};
 
 template <PointerSize kPointerSize>
 class JNIMacroAssembler : public DeletableArenaObject<kArenaAllocAssembler> {
@@ -193,6 +199,15 @@
   // and branch to a ExceptionSlowPath if it is.
   virtual void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) = 0;
 
+  // Create a new label that can be used with Jump/Bind calls.
+  virtual std::unique_ptr<JNIMacroLabel> CreateLabel() = 0;
+  // Emit an unconditional jump to the label.
+  virtual void Jump(JNIMacroLabel* label) = 0;
+  // Emit a conditional jump to the label by applying a unary condition test to the register.
+  virtual void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) = 0;
+  // Code at this offset will serve as the target for the Jump call.
+  virtual void Bind(JNIMacroLabel* label) = 0;
+
   virtual ~JNIMacroAssembler() {}
 
   /**
@@ -205,6 +220,28 @@
   explicit JNIMacroAssembler() {}
 };
 
+// A "Label" class used with the JNIMacroAssembler
+// allowing one to use branches (jumping from one place to another).
+//
+// This is just an interface, so every platform must provide
+// its own implementation of it.
+//
+// It is only safe to use a label created
+// via JNIMacroAssembler::CreateLabel with that same macro assembler.
+class JNIMacroLabel {
+ public:
+  virtual ~JNIMacroLabel() = 0;
+
+  const InstructionSet isa_;
+ protected:
+  explicit JNIMacroLabel(InstructionSet isa) : isa_(isa) {}
+};
+
+inline JNIMacroLabel::~JNIMacroLabel() {
+  // Compulsory definition for a pure virtual destructor
+  // to avoid linking errors.
+}
+
 template <typename T, PointerSize kPointerSize>
 class JNIMacroAssemblerFwd : public JNIMacroAssembler<kPointerSize> {
  public:
@@ -230,6 +267,30 @@
   T asm_;
 };
 
+template <typename Self, typename PlatformLabel, InstructionSet kIsa>
+class JNIMacroLabelCommon : public JNIMacroLabel {
+ public:
+  static Self* Cast(JNIMacroLabel* label) {
+    CHECK(label != nullptr);
+    CHECK_EQ(kIsa, label->isa_);
+
+    return reinterpret_cast<Self*>(label);
+  }
+
+ protected:
+  PlatformLabel* AsPlatformLabel() {
+    return &label_;
+  }
+
+  JNIMacroLabelCommon() : JNIMacroLabel(kIsa) {
+  }
+
+  virtual ~JNIMacroLabelCommon() OVERRIDE {}
+
+ private:
+  PlatformLabel label_;
+};
+
 }  // namespace art
 
 #endif  // ART_COMPILER_UTILS_JNI_MACRO_ASSEMBLER_H_
diff --git a/compiler/utils/jni_macro_assembler_test.h b/compiler/utils/jni_macro_assembler_test.h
index 829f34b..293f4cd 100644
--- a/compiler/utils/jni_macro_assembler_test.h
+++ b/compiler/utils/jni_macro_assembler_test.h
@@ -39,12 +39,12 @@
 
   typedef std::string (*TestFn)(JNIMacroAssemblerTest* assembler_test, Ass* assembler);
 
-  void DriverFn(TestFn f, std::string test_name) {
+  void DriverFn(TestFn f, const std::string& test_name) {
     DriverWrapper(f(this, assembler_.get()), test_name);
   }
 
   // This driver assumes the assembler has already been called.
-  void DriverStr(std::string assembly_string, std::string test_name) {
+  void DriverStr(const std::string& assembly_string, const std::string& test_name) {
     DriverWrapper(assembly_string, test_name);
   }
 
@@ -128,7 +128,7 @@
   virtual void Pad(std::vector<uint8_t>& data ATTRIBUTE_UNUSED) {
   }
 
-  void DriverWrapper(std::string assembly_text, std::string test_name) {
+  void DriverWrapper(const std::string& assembly_text, const std::string& test_name) {
     assembler_->FinalizeCode();
     size_t cs = assembler_->CodeSize();
     std::unique_ptr<std::vector<uint8_t>> data(new std::vector<uint8_t>(cs));
diff --git a/compiler/utils/managed_register.h b/compiler/utils/managed_register.h
index 46adb3f..184cdf5 100644
--- a/compiler/utils/managed_register.h
+++ b/compiler/utils/managed_register.h
@@ -17,8 +17,11 @@
 #ifndef ART_COMPILER_UTILS_MANAGED_REGISTER_H_
 #define ART_COMPILER_UTILS_MANAGED_REGISTER_H_
 
+#include <type_traits>
 #include <vector>
 
+#include "base/value_object.h"
+
 namespace art {
 
 namespace arm {
@@ -42,17 +45,14 @@
 class X86_64ManagedRegister;
 }
 
-class ManagedRegister {
+class ManagedRegister : public ValueObject {
  public:
   // ManagedRegister is a value class. There exists no method to change the
   // internal state. We therefore allow a copy constructor and an
   // assignment-operator.
-  constexpr ManagedRegister(const ManagedRegister& other) : id_(other.id_) { }
+  constexpr ManagedRegister(const ManagedRegister& other) = default;
 
-  ManagedRegister& operator=(const ManagedRegister& other) {
-    id_ = other.id_;
-    return *this;
-  }
+  ManagedRegister& operator=(const ManagedRegister& other) = default;
 
   constexpr arm::ArmManagedRegister AsArm() const;
   constexpr arm64::Arm64ManagedRegister AsArm64() const;
@@ -85,6 +85,9 @@
   int id_;
 };
 
+static_assert(std::is_trivially_copyable<ManagedRegister>::value,
+              "ManagedRegister should be trivially copyable");
+
 class ManagedRegisterSpill : public ManagedRegister {
  public:
   // ManagedRegisterSpill contains information about data type size and location in caller frame
@@ -115,18 +118,18 @@
  public:
   // The ManagedRegister does not have information about size and offset.
   // In this case it's size and offset determined by BuildFrame (assembler)
-  void push_back(ManagedRegister __x) {
-    ManagedRegisterSpill spill(__x);
+  void push_back(ManagedRegister x) {
+    ManagedRegisterSpill spill(x);
     std::vector<ManagedRegisterSpill>::push_back(spill);
   }
 
-  void push_back(ManagedRegister __x, int32_t __size) {
-    ManagedRegisterSpill spill(__x, __size);
+  void push_back(ManagedRegister x, int32_t size) {
+    ManagedRegisterSpill spill(x, size);
     std::vector<ManagedRegisterSpill>::push_back(spill);
   }
 
-  void push_back(ManagedRegisterSpill __x) {
-    std::vector<ManagedRegisterSpill>::push_back(__x);
+  void push_back(ManagedRegisterSpill x) {
+    std::vector<ManagedRegisterSpill>::push_back(x);
   }
  private:
 };
diff --git a/compiler/utils/mips/assembler_mips.cc b/compiler/utils/mips/assembler_mips.cc
index b972c70..b29974c 100644
--- a/compiler/utils/mips/assembler_mips.cc
+++ b/compiler/utils/mips/assembler_mips.cc
@@ -192,6 +192,13 @@
   DsFsmInstr(instruction, 0, 0, (1u << in1_out), (1u << in1_out) | (1u << in2) | (1u << in3), 0, 0);
 }
 
+void MipsAssembler::DsFsmInstrFffr(uint32_t instruction,
+                                   FRegister in1_out,
+                                   FRegister in2,
+                                   Register in3) {
+  DsFsmInstr(instruction, 0, (1u << in3), (1u << in1_out), (1u << in1_out) | (1u << in2), 0, 0);
+}
+
 void MipsAssembler::DsFsmInstrRf(uint32_t instruction, Register out, FRegister in) {
   DsFsmInstr(instruction, (1u << out), 0, 0, (1u << in), 0, 0);
 }
@@ -1446,6 +1453,26 @@
                  cc);
 }
 
+void MipsAssembler::MovzS(FRegister fd, FRegister fs, Register rt) {
+  CHECK(!IsR6());
+  DsFsmInstrFffr(EmitFR(0x11, 0x10, static_cast<FRegister>(rt), fs, fd, 0x12), fd, fs, rt);
+}
+
+void MipsAssembler::MovzD(FRegister fd, FRegister fs, Register rt) {
+  CHECK(!IsR6());
+  DsFsmInstrFffr(EmitFR(0x11, 0x11, static_cast<FRegister>(rt), fs, fd, 0x12), fd, fs, rt);
+}
+
+void MipsAssembler::MovnS(FRegister fd, FRegister fs, Register rt) {
+  CHECK(!IsR6());
+  DsFsmInstrFffr(EmitFR(0x11, 0x10, static_cast<FRegister>(rt), fs, fd, 0x13), fd, fs, rt);
+}
+
+void MipsAssembler::MovnD(FRegister fd, FRegister fs, Register rt) {
+  CHECK(!IsR6());
+  DsFsmInstrFffr(EmitFR(0x11, 0x11, static_cast<FRegister>(rt), fs, fd, 0x13), fd, fs, rt);
+}
+
 void MipsAssembler::SelS(FRegister fd, FRegister fs, FRegister ft) {
   CHECK(IsR6());
   DsFsmInstrFfff(EmitFR(0x11, 0x10, ft, fs, fd, 0x10), fd, fs, ft);
@@ -1456,6 +1483,26 @@
   DsFsmInstrFfff(EmitFR(0x11, 0x11, ft, fs, fd, 0x10), fd, fs, ft);
 }
 
+void MipsAssembler::SeleqzS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x14), fd, fs, ft);
+}
+
+void MipsAssembler::SeleqzD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x14), fd, fs, ft);
+}
+
+void MipsAssembler::SelnezS(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  DsFsmInstrFff(EmitFR(0x11, 0x10, ft, fs, fd, 0x17), fd, fs, ft);
+}
+
+void MipsAssembler::SelnezD(FRegister fd, FRegister fs, FRegister ft) {
+  CHECK(IsR6());
+  DsFsmInstrFff(EmitFR(0x11, 0x11, ft, fs, fd, 0x17), fd, fs, ft);
+}
+
 void MipsAssembler::ClassS(FRegister fd, FRegister fs) {
   CHECK(IsR6());
   DsFsmInstrFff(EmitFR(0x11, 0x10, static_cast<FRegister>(0), fs, fd, 0x1b), fd, fs, fs);
diff --git a/compiler/utils/mips/assembler_mips.h b/compiler/utils/mips/assembler_mips.h
index e1255f7..800dc5f 100644
--- a/compiler/utils/mips/assembler_mips.h
+++ b/compiler/utils/mips/assembler_mips.h
@@ -179,6 +179,8 @@
 
 class MipsAssembler FINAL : public Assembler, public JNIMacroAssembler<PointerSize::k32> {
  public:
+  using JNIBase = JNIMacroAssembler<PointerSize::k32>;
+
   explicit MipsAssembler(ArenaAllocator* arena,
                          const MipsInstructionSetFeatures* instruction_set_features = nullptr)
       : Assembler(arena),
@@ -412,8 +414,16 @@
   void MovfD(FRegister fd, FRegister fs, int cc = 0);  // R2
   void MovtS(FRegister fd, FRegister fs, int cc = 0);  // R2
   void MovtD(FRegister fd, FRegister fs, int cc = 0);  // R2
+  void MovzS(FRegister fd, FRegister fs, Register rt);  // R2
+  void MovzD(FRegister fd, FRegister fs, Register rt);  // R2
+  void MovnS(FRegister fd, FRegister fs, Register rt);  // R2
+  void MovnD(FRegister fd, FRegister fs, Register rt);  // R2
   void SelS(FRegister fd, FRegister fs, FRegister ft);  // R6
   void SelD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void SeleqzS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void SeleqzD(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void SelnezS(FRegister fd, FRegister fs, FRegister ft);  // R6
+  void SelnezD(FRegister fd, FRegister fs, FRegister ft);  // R6
   void ClassS(FRegister fd, FRegister fs);  // R6
   void ClassD(FRegister fd, FRegister fs);  // R6
   void MinS(FRegister fd, FRegister fs, FRegister ft);  // R6
@@ -723,6 +733,34 @@
     UNIMPLEMENTED(FATAL) << "Do not use Jump for MIPS";
   }
 
+  // Don't warn about a different virtual Bind/Jump in the base class.
+  using JNIBase::Bind;
+  using JNIBase::Jump;
+
+  // Create a new label that can be used with Jump/Bind calls.
+  std::unique_ptr<JNIMacroLabel> CreateLabel() OVERRIDE {
+    LOG(FATAL) << "Not implemented on MIPS32";
+    UNREACHABLE();
+  }
+  // Emit an unconditional jump to the label.
+  void Jump(JNIMacroLabel* label ATTRIBUTE_UNUSED) OVERRIDE {
+    LOG(FATAL) << "Not implemented on MIPS32";
+    UNREACHABLE();
+  }
+  // Emit a conditional jump to the label by applying a unary condition test to the register.
+  void Jump(JNIMacroLabel* label ATTRIBUTE_UNUSED,
+            JNIMacroUnaryCondition cond ATTRIBUTE_UNUSED,
+            ManagedRegister test ATTRIBUTE_UNUSED) OVERRIDE {
+    LOG(FATAL) << "Not implemented on MIPS32";
+    UNREACHABLE();
+  }
+
+  // Code at this offset will serve as the target for the Jump call.
+  void Bind(JNIMacroLabel* label ATTRIBUTE_UNUSED) OVERRIDE {
+    LOG(FATAL) << "Not implemented on MIPS32";
+    UNREACHABLE();
+  }
+
   // Create a new literal with a given value.
   // NOTE: Force the template parameter to be explicitly specified.
   template <typename T>
@@ -1227,6 +1265,7 @@
   void DsFsmInstrRrrr(uint32_t instruction, Register in1_out, Register in2, Register in3);
   void DsFsmInstrFff(uint32_t instruction, FRegister out, FRegister in1, FRegister in2);
   void DsFsmInstrFfff(uint32_t instruction, FRegister in1_out, FRegister in2, FRegister in3);
+  void DsFsmInstrFffr(uint32_t instruction, FRegister in1_out, FRegister in2, Register in3);
   void DsFsmInstrRf(uint32_t instruction, Register out, FRegister in);
   void DsFsmInstrFr(uint32_t instruction, FRegister out, Register in);
   void DsFsmInstrFR(uint32_t instruction, FRegister in1, Register in2);
diff --git a/compiler/utils/mips/assembler_mips32r6_test.cc b/compiler/utils/mips/assembler_mips32r6_test.cc
index 750a94d..a52f519 100644
--- a/compiler/utils/mips/assembler_mips32r6_test.cc
+++ b/compiler/utils/mips/assembler_mips32r6_test.cc
@@ -219,7 +219,7 @@
   void BranchCondTwoRegsHelper(void (mips::MipsAssembler::*f)(mips::Register,
                                                               mips::Register,
                                                               mips::MipsLabel*),
-                               std::string instr_name) {
+                               const std::string& instr_name) {
     mips::MipsLabel label;
     (Base::GetAssembler()->*f)(mips::A0, mips::A1, &label);
     constexpr size_t kAdduCount1 = 63;
@@ -349,6 +349,26 @@
   DriverStr(RepeatFFF(&mips::MipsAssembler::SelD, "sel.d ${reg1}, ${reg2}, ${reg3}"), "sel.d");
 }
 
+TEST_F(AssemblerMIPS32r6Test, SeleqzS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::SeleqzS, "seleqz.s ${reg1}, ${reg2}, ${reg3}"),
+            "seleqz.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SeleqzD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::SeleqzD, "seleqz.d ${reg1}, ${reg2}, ${reg3}"),
+            "seleqz.d");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SelnezS) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::SelnezS, "selnez.s ${reg1}, ${reg2}, ${reg3}"),
+            "selnez.s");
+}
+
+TEST_F(AssemblerMIPS32r6Test, SelnezD) {
+  DriverStr(RepeatFFF(&mips::MipsAssembler::SelnezD, "selnez.d ${reg1}, ${reg2}, ${reg3}"),
+            "selnez.d");
+}
+
 TEST_F(AssemblerMIPS32r6Test, ClassS) {
   DriverStr(RepeatFF(&mips::MipsAssembler::ClassS, "class.s ${reg1}, ${reg2}"), "class.s");
 }
diff --git a/compiler/utils/mips/assembler_mips_test.cc b/compiler/utils/mips/assembler_mips_test.cc
index a9abf2f..c24e1b1 100644
--- a/compiler/utils/mips/assembler_mips_test.cc
+++ b/compiler/utils/mips/assembler_mips_test.cc
@@ -188,7 +188,7 @@
 
   void BranchCondOneRegHelper(void (mips::MipsAssembler::*f)(mips::Register,
                                                              mips::MipsLabel*),
-                              std::string instr_name) {
+                              const std::string& instr_name) {
     mips::MipsLabel label;
     (Base::GetAssembler()->*f)(mips::A0, &label);
     constexpr size_t kAdduCount1 = 63;
@@ -217,7 +217,7 @@
   void BranchCondTwoRegsHelper(void (mips::MipsAssembler::*f)(mips::Register,
                                                               mips::Register,
                                                               mips::MipsLabel*),
-                               std::string instr_name) {
+                               const std::string& instr_name) {
     mips::MipsLabel label;
     (Base::GetAssembler()->*f)(mips::A0, mips::A1, &label);
     constexpr size_t kAdduCount1 = 63;
@@ -667,6 +667,22 @@
             "MovtD");
 }
 
+TEST_F(AssemblerMIPSTest, MovzS) {
+  DriverStr(RepeatFFR(&mips::MipsAssembler::MovzS, "movz.s ${reg1}, ${reg2}, ${reg3}"), "MovzS");
+}
+
+TEST_F(AssemblerMIPSTest, MovzD) {
+  DriverStr(RepeatFFR(&mips::MipsAssembler::MovzD, "movz.d ${reg1}, ${reg2}, ${reg3}"), "MovzD");
+}
+
+TEST_F(AssemblerMIPSTest, MovnS) {
+  DriverStr(RepeatFFR(&mips::MipsAssembler::MovnS, "movn.s ${reg1}, ${reg2}, ${reg3}"), "MovnS");
+}
+
+TEST_F(AssemblerMIPSTest, MovnD) {
+  DriverStr(RepeatFFR(&mips::MipsAssembler::MovnD, "movn.d ${reg1}, ${reg2}, ${reg3}"), "MovnD");
+}
+
 TEST_F(AssemblerMIPSTest, CvtSW) {
   DriverStr(RepeatFF(&mips::MipsAssembler::Cvtsw, "cvt.s.w ${reg1}, ${reg2}"), "CvtSW");
 }
diff --git a/compiler/utils/mips64/assembler_mips64.h b/compiler/utils/mips64/assembler_mips64.h
index 6277b5d..238cb9d 100644
--- a/compiler/utils/mips64/assembler_mips64.h
+++ b/compiler/utils/mips64/assembler_mips64.h
@@ -335,6 +335,8 @@
 
 class Mips64Assembler FINAL : public Assembler, public JNIMacroAssembler<PointerSize::k64> {
  public:
+  using JNIBase = JNIMacroAssembler<PointerSize::k64>;
+
   explicit Mips64Assembler(ArenaAllocator* arena)
       : Assembler(arena),
         overwriting_(false),
@@ -574,6 +576,35 @@
   }
 
   void Bind(Mips64Label* label);
+
+  // Don't warn about a different virtual Bind/Jump in the base class.
+  using JNIBase::Bind;
+  using JNIBase::Jump;
+
+  // Create a new label that can be used with Jump/Bind calls.
+  std::unique_ptr<JNIMacroLabel> CreateLabel() OVERRIDE {
+    LOG(FATAL) << "Not implemented on MIPS64";
+    UNREACHABLE();
+  }
+  // Emit an unconditional jump to the label.
+  void Jump(JNIMacroLabel* label ATTRIBUTE_UNUSED) OVERRIDE {
+    LOG(FATAL) << "Not implemented on MIPS64";
+    UNREACHABLE();
+  }
+  // Emit a conditional jump to the label by applying a unary condition test to the register.
+  void Jump(JNIMacroLabel* label ATTRIBUTE_UNUSED,
+            JNIMacroUnaryCondition cond ATTRIBUTE_UNUSED,
+            ManagedRegister test ATTRIBUTE_UNUSED) OVERRIDE {
+    LOG(FATAL) << "Not implemented on MIPS64";
+    UNREACHABLE();
+  }
+
+  // Code at this offset will serve as the target for the Jump call.
+  void Bind(JNIMacroLabel* label ATTRIBUTE_UNUSED) OVERRIDE {
+    LOG(FATAL) << "Not implemented on MIPS64";
+    UNREACHABLE();
+  }
+
   void Bc(Mips64Label* label);
   void Jialc(Mips64Label* label, GpuRegister indirect_reg);
   void Bltc(GpuRegister rs, GpuRegister rt, Mips64Label* label);
diff --git a/compiler/utils/mips64/assembler_mips64_test.cc b/compiler/utils/mips64/assembler_mips64_test.cc
index 1fdef96..ba8f25e 100644
--- a/compiler/utils/mips64/assembler_mips64_test.cc
+++ b/compiler/utils/mips64/assembler_mips64_test.cc
@@ -212,7 +212,7 @@
 
   void BranchCondOneRegHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister,
                                                                  mips64::Mips64Label*),
-                              std::string instr_name) {
+                              const std::string& instr_name) {
     mips64::Mips64Label label;
     (Base::GetAssembler()->*f)(mips64::A0, &label);
     constexpr size_t kAdduCount1 = 63;
@@ -241,7 +241,7 @@
   void BranchCondTwoRegsHelper(void (mips64::Mips64Assembler::*f)(mips64::GpuRegister,
                                                                   mips64::GpuRegister,
                                                                   mips64::Mips64Label*),
-                               std::string instr_name) {
+                               const std::string& instr_name) {
     mips64::Mips64Label label;
     (Base::GetAssembler()->*f)(mips64::A0, mips64::A1, &label);
     constexpr size_t kAdduCount1 = 63;
diff --git a/compiler/utils/test_dex_file_builder_test.cc b/compiler/utils/test_dex_file_builder_test.cc
index 7a424a2..da4ac79 100644
--- a/compiler/utils/test_dex_file_builder_test.cc
+++ b/compiler/utils/test_dex_file_builder_test.cc
@@ -66,14 +66,14 @@
   }
 
   ASSERT_EQ(1u, dex_file->NumFieldIds());
-  EXPECT_STREQ("[I TestClass.intField", PrettyField(0u, *dex_file).c_str());
+  EXPECT_STREQ("[I TestClass.intField", dex_file->PrettyField(0u).c_str());
 
   ASSERT_EQ(2u, dex_file->NumProtoIds());
   ASSERT_EQ(2u, dex_file->NumMethodIds());
   EXPECT_STREQ("TestClass TestClass.bar(java.lang.Object, java.lang.Object[])",
-               PrettyMethod(0u, *dex_file).c_str());
+               dex_file->PrettyMethod(0u).c_str());
   EXPECT_STREQ("int TestClass.foo()",
-               PrettyMethod(1u, *dex_file).c_str());
+               dex_file->PrettyMethod(1u).c_str());
 
   EXPECT_EQ(0u, builder.GetStringIdx("Arbitrary string"));
   EXPECT_EQ(2u, builder.GetTypeIdx("Ljava/lang/Class;"));
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.cc b/compiler/utils/x86/jni_macro_assembler_x86.cc
index 77af885..cfdf80b 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.cc
+++ b/compiler/utils/x86/jni_macro_assembler_x86.cc
@@ -215,8 +215,12 @@
   if (dest.IsNoRegister()) {
     CHECK_EQ(0u, size);
   } else if (dest.IsCpuRegister()) {
-    CHECK_EQ(4u, size);
-    __ fs()->movl(dest.AsCpuRegister(), Address::Absolute(src));
+    if (size == 1u) {
+      __ fs()->movzxb(dest.AsCpuRegister(), Address::Absolute(src));
+    } else {
+      CHECK_EQ(4u, size);
+      __ fs()->movl(dest.AsCpuRegister(), Address::Absolute(src));
+    }
   } else if (dest.IsRegisterPair()) {
     CHECK_EQ(8u, size);
     __ fs()->movl(dest.AsRegisterPairLow(), Address::Absolute(src));
@@ -519,6 +523,48 @@
   __ j(kNotEqual, slow->Entry());
 }
 
+std::unique_ptr<JNIMacroLabel> X86JNIMacroAssembler::CreateLabel() {
+  return std::unique_ptr<JNIMacroLabel>(new X86JNIMacroLabel());
+}
+
+void X86JNIMacroAssembler::Jump(JNIMacroLabel* label) {
+  CHECK(label != nullptr);
+  __ jmp(X86JNIMacroLabel::Cast(label)->AsX86());
+}
+
+void X86JNIMacroAssembler::Jump(JNIMacroLabel* label,
+                                JNIMacroUnaryCondition condition,
+                                ManagedRegister test) {
+  CHECK(label != nullptr);
+
+  art::x86::Condition x86_cond;
+  switch (condition) {
+    case JNIMacroUnaryCondition::kZero:
+      x86_cond = art::x86::kZero;
+      break;
+    case JNIMacroUnaryCondition::kNotZero:
+      x86_cond = art::x86::kNotZero;
+      break;
+    default:
+      LOG(FATAL) << "Not implemented condition: " << static_cast<int>(condition);
+      UNREACHABLE();
+  }
+
+  // TEST reg, reg
+  // Jcc <Offset>
+  __ testl(test.AsX86().AsCpuRegister(), test.AsX86().AsCpuRegister());
+  __ j(x86_cond, X86JNIMacroLabel::Cast(label)->AsX86());
+
+
+  // X86 also has JCZX, JECZX, however it's not worth it to implement
+  // because we aren't likely to codegen with ECX+kZero check.
+}
+
+void X86JNIMacroAssembler::Bind(JNIMacroLabel* label) {
+  CHECK(label != nullptr);
+  __ Bind(X86JNIMacroLabel::Cast(label)->AsX86());
+}
+
 #undef __
 
 void X86ExceptionSlowPath::Emit(Assembler *sasm) {
diff --git a/compiler/utils/x86/jni_macro_assembler_x86.h b/compiler/utils/x86/jni_macro_assembler_x86.h
index 015584c..8ffda64 100644
--- a/compiler/utils/x86/jni_macro_assembler_x86.h
+++ b/compiler/utils/x86/jni_macro_assembler_x86.h
@@ -30,6 +30,8 @@
 namespace art {
 namespace x86 {
 
+class X86JNIMacroLabel;
+
 class X86JNIMacroAssembler FINAL : public JNIMacroAssemblerFwd<X86Assembler, PointerSize::k32> {
  public:
   explicit X86JNIMacroAssembler(ArenaAllocator* arena) : JNIMacroAssemblerFwd(arena) {}
@@ -152,10 +154,29 @@
   // and branch to a ExceptionSlowPath if it is.
   void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
 
+  // Create a new label that can be used with Jump/Bind calls.
+  std::unique_ptr<JNIMacroLabel> CreateLabel() OVERRIDE;
+  // Emit an unconditional jump to the label.
+  void Jump(JNIMacroLabel* label) OVERRIDE;
+  // Emit a conditional jump to the label by applying a unary condition test to the register.
+  void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) OVERRIDE;
+  // Code at this offset will serve as the target for the Jump call.
+  void Bind(JNIMacroLabel* label) OVERRIDE;
+
  private:
   DISALLOW_COPY_AND_ASSIGN(X86JNIMacroAssembler);
 };
 
+class X86JNIMacroLabel FINAL
+    : public JNIMacroLabelCommon<X86JNIMacroLabel,
+                                 art::Label,
+                                 kX86> {
+ public:
+  art::Label* AsX86() {
+    return AsPlatformLabel();
+  }
+};
+
 }  // namespace x86
 }  // namespace art
 
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
index 3e687a7..ec86254 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.cc
@@ -260,8 +260,12 @@
   if (dest.IsNoRegister()) {
     CHECK_EQ(0u, size);
   } else if (dest.IsCpuRegister()) {
-    CHECK_EQ(4u, size);
-    __ gs()->movl(dest.AsCpuRegister(), Address::Absolute(src, true));
+    if (size == 1u) {
+      __ gs()->movzxb(dest.AsCpuRegister(), Address::Absolute(src, true));
+    } else {
+      CHECK_EQ(4u, size);
+      __ gs()->movl(dest.AsCpuRegister(), Address::Absolute(src, true));
+    }
   } else if (dest.IsRegisterPair()) {
     CHECK_EQ(8u, size);
     __ gs()->movq(dest.AsRegisterPairLow(), Address::Absolute(src, true));
@@ -585,6 +589,44 @@
   __ j(kNotEqual, slow->Entry());
 }
 
+std::unique_ptr<JNIMacroLabel> X86_64JNIMacroAssembler::CreateLabel() {
+  return std::unique_ptr<JNIMacroLabel>(new X86_64JNIMacroLabel());
+}
+
+void X86_64JNIMacroAssembler::Jump(JNIMacroLabel* label) {
+  CHECK(label != nullptr);
+  __ jmp(X86_64JNIMacroLabel::Cast(label)->AsX86_64());
+}
+
+void X86_64JNIMacroAssembler::Jump(JNIMacroLabel* label,
+                                   JNIMacroUnaryCondition condition,
+                                   ManagedRegister test) {
+  CHECK(label != nullptr);
+
+  art::x86_64::Condition x86_64_cond;
+  switch (condition) {
+    case JNIMacroUnaryCondition::kZero:
+      x86_64_cond = art::x86_64::kZero;
+      break;
+    case JNIMacroUnaryCondition::kNotZero:
+      x86_64_cond = art::x86_64::kNotZero;
+      break;
+    default:
+      LOG(FATAL) << "Not implemented condition: " << static_cast<int>(condition);
+      UNREACHABLE();
+  }
+
+  // TEST reg, reg
+  // Jcc <Offset>
+  __ testq(test.AsX86_64().AsCpuRegister(), test.AsX86_64().AsCpuRegister());
+  __ j(x86_64_cond, X86_64JNIMacroLabel::Cast(label)->AsX86_64());
+}
+
+void X86_64JNIMacroAssembler::Bind(JNIMacroLabel* label) {
+  CHECK(label != nullptr);
+  __ Bind(X86_64JNIMacroLabel::Cast(label)->AsX86_64());
+}
+
 #undef __
 
 void X86_64ExceptionSlowPath::Emit(Assembler *sasm) {
diff --git a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
index 9107f3c..aa058f7 100644
--- a/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
+++ b/compiler/utils/x86_64/jni_macro_assembler_x86_64.h
@@ -180,10 +180,29 @@
   // and branch to a ExceptionSlowPath if it is.
   void ExceptionPoll(ManagedRegister scratch, size_t stack_adjust) OVERRIDE;
 
+  // Create a new label that can be used with Jump/Bind calls.
+  std::unique_ptr<JNIMacroLabel> CreateLabel() OVERRIDE;
+  // Emit an unconditional jump to the label.
+  void Jump(JNIMacroLabel* label) OVERRIDE;
+  // Emit a conditional jump to the label by applying a unary condition test to the register.
+  void Jump(JNIMacroLabel* label, JNIMacroUnaryCondition cond, ManagedRegister test) OVERRIDE;
+  // Code at this offset will serve as the target for the Jump call.
+  void Bind(JNIMacroLabel* label) OVERRIDE;
+
  private:
   DISALLOW_COPY_AND_ASSIGN(X86_64JNIMacroAssembler);
 };
 
+class X86_64JNIMacroLabel FINAL
+    : public JNIMacroLabelCommon<X86_64JNIMacroLabel,
+                                 art::Label,
+                                 kX86_64> {
+ public:
+  art::Label* AsX86_64() {
+    return AsPlatformLabel();
+  }
+};
+
 }  // namespace x86_64
 }  // namespace art
 
diff --git a/runtime/verifier/verifier_deps_test.cc b/compiler/verifier_deps_test.cc
similarity index 64%
rename from runtime/verifier/verifier_deps_test.cc
rename to compiler/verifier_deps_test.cc
index 71203e6..6b690aa 100644
--- a/runtime/verifier/verifier_deps_test.cc
+++ b/compiler/verifier_deps_test.cc
@@ -14,14 +14,17 @@
  * limitations under the License.
  */
 
-#include "verifier_deps.h"
+// Test is in compiler, as it uses compiler related code.
+#include "verifier/verifier_deps.h"
 
 #include "class_linker.h"
-#include "common_runtime_test.h"
+#include "compiler/common_compiler_test.h"
+#include "compiler/driver/compiler_options.h"
+#include "compiler/driver/compiler_driver.h"
 #include "compiler_callbacks.h"
 #include "dex_file.h"
 #include "handle_scope-inl.h"
-#include "method_verifier-inl.h"
+#include "verifier/method_verifier-inl.h"
 #include "mirror/class_loader.h"
 #include "runtime.h"
 #include "thread.h"
@@ -47,10 +50,10 @@
   verifier::VerifierDeps* deps_;
 };
 
-class VerifierDepsTest : public CommonRuntimeTest {
+class VerifierDepsTest : public CommonCompilerTest {
  public:
   void SetUpRuntimeOptions(RuntimeOptions* options) {
-    CommonRuntimeTest::SetUpRuntimeOptions(options);
+    CommonCompilerTest::SetUpRuntimeOptions(options);
     callbacks_.reset(new VerifierDepsCompilerCallbacks());
   }
 
@@ -69,6 +72,22 @@
     return klass;
   }
 
+  void SetupCompilerDriver() {
+    compiler_options_->boot_image_ = false;
+    compiler_driver_->InitializeThreadPools();
+  }
+
+  void VerifyWithCompilerDriver(verifier::VerifierDeps* deps) {
+    TimingLogger timings("Verify", false, false);
+    // The compiler driver handles the verifier deps in the callbacks, so
+    // remove what this class did for unit testing.
+    verifier_deps_.reset(nullptr);
+    callbacks_->SetVerifierDeps(nullptr);
+    compiler_driver_->Verify(class_loader_, dex_files_, deps, &timings);
+    // The compiler driver may have updated the VerifierDeps in the callback object.
+    verifier_deps_.reset(callbacks_->GetVerifierDeps());
+  }
+
   void SetVerifierDeps(const std::vector<const DexFile*>& dex_files) {
     verifier_deps_.reset(new verifier::VerifierDeps(dex_files));
     VerifierDepsCompilerCallbacks* callbacks =
@@ -76,17 +95,24 @@
     callbacks->SetVerifierDeps(verifier_deps_.get());
   }
 
+  void LoadDexFile(ScopedObjectAccess* soa, const char* name1, const char* name2 = nullptr)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    class_loader_ = (name2 == nullptr) ? LoadDex(name1) : LoadMultiDex(name1, name2);
+    dex_files_ = GetDexFiles(class_loader_);
+    primary_dex_file_ = dex_files_.front();
+
+    SetVerifierDeps(dex_files_);
+    StackHandleScope<1> hs(soa->Self());
+    Handle<mirror::ClassLoader> loader =
+        hs.NewHandle(soa->Decode<mirror::ClassLoader>(class_loader_));
+    for (const DexFile* dex_file : dex_files_) {
+      class_linker_->RegisterDexFile(*dex_file, loader.Get());
+    }
+  }
+
   void LoadDexFile(ScopedObjectAccess* soa) REQUIRES_SHARED(Locks::mutator_lock_) {
-    class_loader_ = LoadDex("VerifierDeps");
-    std::vector<const DexFile*> dex_files = GetDexFiles(class_loader_);
-    CHECK_EQ(dex_files.size(), 1u);
-    dex_file_ = dex_files.front();
-
-    SetVerifierDeps(dex_files);
-
-    ObjPtr<mirror::ClassLoader> loader = soa->Decode<mirror::ClassLoader>(class_loader_);
-    class_linker_->RegisterDexFile(*dex_file_, loader.Ptr());
-
+    LoadDexFile(soa, "VerifierDeps");
+    CHECK_EQ(dex_files_.size(), 1u);
     klass_Main_ = FindClassByName("LMain;", soa);
     CHECK(klass_Main_ != nullptr);
   }
@@ -95,16 +121,16 @@
     ScopedObjectAccess soa(Thread::Current());
     LoadDexFile(&soa);
 
-    StackHandleScope<2> hs(Thread::Current());
+    StackHandleScope<2> hs(soa.Self());
     Handle<mirror::ClassLoader> class_loader_handle(
         hs.NewHandle(soa.Decode<mirror::ClassLoader>(class_loader_)));
     Handle<mirror::DexCache> dex_cache_handle(hs.NewHandle(klass_Main_->GetDexCache()));
 
     const DexFile::ClassDef* class_def = klass_Main_->GetClassDef();
-    const uint8_t* class_data = dex_file_->GetClassData(*class_def);
+    const uint8_t* class_data = primary_dex_file_->GetClassData(*class_def);
     CHECK(class_data != nullptr);
 
-    ClassDataItemIterator it(*dex_file_, class_data);
+    ClassDataItemIterator it(*primary_dex_file_, class_data);
     while (it.HasNextStaticField() || it.HasNextInstanceField()) {
       it.Next();
     }
@@ -112,7 +138,7 @@
     ArtMethod* method = nullptr;
     while (it.HasNextDirectMethod()) {
       ArtMethod* resolved_method = class_linker_->ResolveMethod<ClassLinker::kNoICCECheckForCache>(
-          *dex_file_,
+          *primary_dex_file_,
           it.GetMemberIndex(),
           dex_cache_handle,
           class_loader_handle,
@@ -128,7 +154,7 @@
     CHECK(method != nullptr);
 
     MethodVerifier verifier(Thread::Current(),
-                            dex_file_,
+                            primary_dex_file_,
                             dex_cache_handle,
                             class_loader_handle,
                             *class_def,
@@ -145,25 +171,13 @@
     return !verifier.HasFailures();
   }
 
-  void VerifyDexFile() {
-    std::string error_msg;
-    ScopedObjectAccess soa(Thread::Current());
-
-    LoadDexFile(&soa);
-    SetVerifierDeps({ dex_file_ });
-
-    for (size_t i = 0; i < dex_file_->NumClassDefs(); i++) {
-      const char* descriptor = dex_file_->GetClassDescriptor(dex_file_->GetClassDef(i));
-      mirror::Class* klass = FindClassByName(descriptor, &soa);
-      if (klass != nullptr) {
-        MethodVerifier::VerifyClass(Thread::Current(),
-                                    klass,
-                                    nullptr,
-                                    true,
-                                    HardFailLogMode::kLogWarning,
-                                    &error_msg);
-      }
+  void VerifyDexFile(const char* multidex = nullptr) {
+    {
+      ScopedObjectAccess soa(Thread::Current());
+      LoadDexFile(&soa, "VerifierDeps", multidex);
     }
+    SetupCompilerDriver();
+    VerifyWithCompilerDriver(/* verifier_deps */ nullptr);
   }
 
   bool TestAssignabilityRecording(const std::string& dst,
@@ -176,7 +190,7 @@
     DCHECK(klass_dst != nullptr);
     mirror::Class* klass_src = FindClassByName(src, &soa);
     DCHECK(klass_src != nullptr);
-    verifier_deps_->AddAssignability(*dex_file_,
+    verifier_deps_->AddAssignability(*primary_dex_file_,
                                      klass_dst,
                                      klass_src,
                                      is_strict,
@@ -184,6 +198,48 @@
     return true;
   }
 
+  // Check that the status of classes in `class_loader_` match the
+  // expected status in `deps`.
+  void VerifyClassStatus(const verifier::VerifierDeps& deps) {
+    ScopedObjectAccess soa(Thread::Current());
+    StackHandleScope<2> hs(soa.Self());
+    Handle<mirror::ClassLoader> class_loader_handle(
+        hs.NewHandle(soa.Decode<mirror::ClassLoader>(class_loader_)));
+    MutableHandle<mirror::Class> cls(hs.NewHandle<mirror::Class>(nullptr));
+    for (const DexFile* dex_file : dex_files_) {
+      const std::vector<uint16_t>& unverified_classes = deps.GetUnverifiedClasses(*dex_file);
+      std::set<uint16_t> set(unverified_classes.begin(), unverified_classes.end());
+      for (uint32_t i = 0; i < dex_file->NumClassDefs(); ++i) {
+        const DexFile::ClassDef& class_def = dex_file->GetClassDef(i);
+        const char* descriptor = dex_file->GetClassDescriptor(class_def);
+        cls.Assign(class_linker_->FindClass(soa.Self(), descriptor, class_loader_handle));
+        if (cls.Get() == nullptr) {
+          CHECK(soa.Self()->IsExceptionPending());
+          soa.Self()->ClearException();
+        } else if (set.find(class_def.class_idx_) == set.end()) {
+          ASSERT_EQ(cls->GetStatus(), mirror::Class::kStatusVerified);
+        } else {
+          ASSERT_LT(cls->GetStatus(), mirror::Class::kStatusVerified);
+        }
+      }
+    }
+  }
+
+  bool HasUnverifiedClass(const std::string& cls) {
+    const DexFile::TypeId* type_id = primary_dex_file_->FindTypeId(cls.c_str());
+    DCHECK(type_id != nullptr);
+    uint16_t index = primary_dex_file_->GetIndexForTypeId(*type_id);
+    MutexLock mu(Thread::Current(), *Locks::verifier_deps_lock_);
+    for (const auto& dex_dep : verifier_deps_->dex_deps_) {
+      for (uint16_t entry : dex_dep.second->unverified_classes_) {
+        if (index == entry) {
+          return true;
+        }
+      }
+    }
+    return false;
+  }
+
   // Iterates over all assignability records and tries to find an entry which
   // matches the expected destination/source pair.
   bool HasAssignable(const std::string& expected_destination,
@@ -361,6 +417,7 @@
     bool has_classes = false;
     bool has_fields = false;
     bool has_methods = false;
+    bool has_unverified_classes = false;
 
     for (auto& entry : verifier_deps_->dex_deps_) {
       has_strings |= !entry.second->strings_.empty();
@@ -371,13 +428,32 @@
       has_methods |= !entry.second->direct_methods_.empty();
       has_methods |= !entry.second->virtual_methods_.empty();
       has_methods |= !entry.second->interface_methods_.empty();
+      has_unverified_classes |= !entry.second->unverified_classes_.empty();
     }
 
-    return has_strings && has_assignability && has_classes && has_fields && has_methods;
+    return has_strings &&
+           has_assignability &&
+           has_classes &&
+           has_fields &&
+           has_methods &&
+           has_unverified_classes;
+  }
+
+  static std::set<VerifierDeps::MethodResolution>* GetMethods(
+      VerifierDeps::DexFileDeps* deps, MethodResolutionKind resolution_kind) {
+    if (resolution_kind == kDirectMethodResolution) {
+      return &deps->direct_methods_;
+    } else if (resolution_kind == kVirtualMethodResolution) {
+      return &deps->virtual_methods_;
+    } else {
+      DCHECK_EQ(resolution_kind, kInterfaceMethodResolution);
+      return &deps->interface_methods_;
+    }
   }
 
   std::unique_ptr<verifier::VerifierDeps> verifier_deps_;
-  const DexFile* dex_file_;
+  std::vector<const DexFile*> dex_files_;
+  const DexFile* primary_dex_file_;
   jobject class_loader_;
   mirror::Class* klass_Main_;
 };
@@ -388,21 +464,21 @@
 
   MutexLock mu(Thread::Current(), *Locks::verifier_deps_lock_);
 
-  uint32_t id_Main1 = verifier_deps_->GetIdFromString(*dex_file_, "LMain;");
-  ASSERT_LT(id_Main1, dex_file_->NumStringIds());
-  ASSERT_EQ("LMain;", verifier_deps_->GetStringFromId(*dex_file_, id_Main1));
+  uint32_t id_Main1 = verifier_deps_->GetIdFromString(*primary_dex_file_, "LMain;");
+  ASSERT_LT(id_Main1, primary_dex_file_->NumStringIds());
+  ASSERT_EQ("LMain;", verifier_deps_->GetStringFromId(*primary_dex_file_, id_Main1));
 
-  uint32_t id_Main2 = verifier_deps_->GetIdFromString(*dex_file_, "LMain;");
-  ASSERT_LT(id_Main2, dex_file_->NumStringIds());
-  ASSERT_EQ("LMain;", verifier_deps_->GetStringFromId(*dex_file_, id_Main2));
+  uint32_t id_Main2 = verifier_deps_->GetIdFromString(*primary_dex_file_, "LMain;");
+  ASSERT_LT(id_Main2, primary_dex_file_->NumStringIds());
+  ASSERT_EQ("LMain;", verifier_deps_->GetStringFromId(*primary_dex_file_, id_Main2));
 
-  uint32_t id_Lorem1 = verifier_deps_->GetIdFromString(*dex_file_, "Lorem ipsum");
-  ASSERT_GE(id_Lorem1, dex_file_->NumStringIds());
-  ASSERT_EQ("Lorem ipsum", verifier_deps_->GetStringFromId(*dex_file_, id_Lorem1));
+  uint32_t id_Lorem1 = verifier_deps_->GetIdFromString(*primary_dex_file_, "Lorem ipsum");
+  ASSERT_GE(id_Lorem1, primary_dex_file_->NumStringIds());
+  ASSERT_EQ("Lorem ipsum", verifier_deps_->GetStringFromId(*primary_dex_file_, id_Lorem1));
 
-  uint32_t id_Lorem2 = verifier_deps_->GetIdFromString(*dex_file_, "Lorem ipsum");
-  ASSERT_GE(id_Lorem2, dex_file_->NumStringIds());
-  ASSERT_EQ("Lorem ipsum", verifier_deps_->GetStringFromId(*dex_file_, id_Lorem2));
+  uint32_t id_Lorem2 = verifier_deps_->GetIdFromString(*primary_dex_file_, "Lorem ipsum");
+  ASSERT_GE(id_Lorem2, primary_dex_file_->NumStringIds());
+  ASSERT_EQ("Lorem ipsum", verifier_deps_->GetStringFromId(*primary_dex_file_, id_Lorem2));
 
   ASSERT_EQ(id_Main1, id_Main2);
   ASSERT_EQ(id_Lorem1, id_Lorem2);
@@ -1049,12 +1125,408 @@
   ASSERT_TRUE(HasEachKindOfRecord());
 
   std::vector<uint8_t> buffer;
-  verifier_deps_->Encode(&buffer);
+  verifier_deps_->Encode(dex_files_, &buffer);
   ASSERT_FALSE(buffer.empty());
 
-  VerifierDeps decoded_deps({ dex_file_ }, ArrayRef<uint8_t>(buffer));
+  VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
   ASSERT_TRUE(verifier_deps_->Equals(decoded_deps));
 }
 
+TEST_F(VerifierDepsTest, EncodeDecodeMulti) {
+  VerifyDexFile("MultiDex");
+
+  ASSERT_GT(NumberOfCompiledDexFiles(), 1u);
+  std::vector<uint8_t> buffer;
+  verifier_deps_->Encode(dex_files_, &buffer);
+  ASSERT_FALSE(buffer.empty());
+
+  // Create new DexFile, to mess with std::map order: the verifier deps used
+  // to iterate over the map, which doesn't guarantee insertion order. We fixed
+  // this by passing the expected order when encoding/decoding.
+  std::vector<std::unique_ptr<const DexFile>> first_dex_files = OpenTestDexFiles("VerifierDeps");
+  std::vector<std::unique_ptr<const DexFile>> second_dex_files = OpenTestDexFiles("MultiDex");
+  std::vector<const DexFile*> dex_files;
+  for (auto& dex_file : first_dex_files) {
+    dex_files.push_back(dex_file.get());
+  }
+  for (auto& dex_file : second_dex_files) {
+    dex_files.push_back(dex_file.get());
+  }
+
+  // Dump the new verifier deps to ensure it can properly read the data.
+  VerifierDeps decoded_deps(dex_files, ArrayRef<const uint8_t>(buffer));
+  std::ostringstream stream;
+  VariableIndentationOutputStream os(&stream);
+  decoded_deps.Dump(&os);
+}
+
+TEST_F(VerifierDepsTest, UnverifiedClasses) {
+  VerifyDexFile();
+  ASSERT_FALSE(HasUnverifiedClass("LMyThread;"));
+  // Test that a class with a soft failure is recorded.
+  ASSERT_TRUE(HasUnverifiedClass("LMain;"));
+  // Test that a class with hard failure is recorded.
+  ASSERT_TRUE(HasUnverifiedClass("LMyVerificationFailure;"));
+  // Test that a class with unresolved super is recorded.
+  ASSERT_FALSE(HasUnverifiedClass("LMyClassWithNoSuper;"));
+  // Test that a class with unresolved super and hard failure is recorded.
+  ASSERT_TRUE(HasUnverifiedClass("LMyClassWithNoSuperButFailures;"));
+}
+
+// Returns the next resolution kind in the enum.
+static MethodResolutionKind GetNextResolutionKind(MethodResolutionKind resolution_kind) {
+  if (resolution_kind == kDirectMethodResolution) {
+    return kVirtualMethodResolution;
+  } else if (resolution_kind == kVirtualMethodResolution) {
+    return kInterfaceMethodResolution;
+  } else {
+    DCHECK_EQ(resolution_kind, kInterfaceMethodResolution);
+    return kDirectMethodResolution;
+  }
+}
+
+TEST_F(VerifierDepsTest, VerifyDeps) {
+  VerifyDexFile();
+
+  ASSERT_EQ(1u, NumberOfCompiledDexFiles());
+  ASSERT_TRUE(HasEachKindOfRecord());
+
+  // When validating, we create a new class loader, as
+  // the existing `class_loader_` may contain erroneous classes,
+  // that ClassLinker::FindClass won't return.
+
+  ScopedObjectAccess soa(Thread::Current());
+  StackHandleScope<1> hs(soa.Self());
+  MutableHandle<mirror::ClassLoader> new_class_loader(hs.NewHandle<mirror::ClassLoader>(nullptr));
+  {
+    new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+    ASSERT_TRUE(verifier_deps_->ValidateDependencies(new_class_loader, soa.Self()));
+  }
+
+  std::vector<uint8_t> buffer;
+  verifier_deps_->Encode(dex_files_, &buffer);
+  ASSERT_FALSE(buffer.empty());
+
+  {
+    VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+    new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+    ASSERT_TRUE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+  }
+
+  // Fiddle with the dependencies to make sure we catch any change and fail to verify.
+
+  {
+    // Mess up with the assignable_types.
+    VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+    VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+    deps->assignable_types_.insert(*deps->unassignable_types_.begin());
+    new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+    ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+  }
+
+  {
+    // Mess up with the unassignable_types.
+    VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+    VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+    deps->unassignable_types_.insert(*deps->assignable_types_.begin());
+    new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+    ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+  }
+
+  // Mess up with classes.
+  {
+    VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+    VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+    bool found = false;
+    for (const auto& entry : deps->classes_) {
+      if (entry.IsResolved()) {
+        deps->classes_.insert(VerifierDeps::ClassResolution(
+            entry.GetDexTypeIndex(), VerifierDeps::kUnresolvedMarker));
+        found = true;
+        break;
+      }
+    }
+    ASSERT_TRUE(found);
+    new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+    ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+  }
+
+  {
+    VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+    VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+    bool found = false;
+    for (const auto& entry : deps->classes_) {
+      if (!entry.IsResolved()) {
+        deps->classes_.insert(VerifierDeps::ClassResolution(
+            entry.GetDexTypeIndex(), VerifierDeps::kUnresolvedMarker - 1));
+        found = true;
+        break;
+      }
+    }
+    ASSERT_TRUE(found);
+    new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+    ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+  }
+
+  {
+    VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+    VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+    bool found = false;
+    for (const auto& entry : deps->classes_) {
+      if (entry.IsResolved()) {
+        deps->classes_.insert(VerifierDeps::ClassResolution(
+            entry.GetDexTypeIndex(), entry.GetAccessFlags() - 1));
+        found = true;
+        break;
+      }
+    }
+    ASSERT_TRUE(found);
+    new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+    ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+  }
+
+  // Mess up with fields.
+  {
+    VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+    VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+    bool found = false;
+    for (const auto& entry : deps->fields_) {
+      if (entry.IsResolved()) {
+        deps->fields_.insert(VerifierDeps::FieldResolution(entry.GetDexFieldIndex(),
+                                                           VerifierDeps::kUnresolvedMarker,
+                                                           entry.GetDeclaringClassIndex()));
+        found = true;
+        break;
+      }
+    }
+    ASSERT_TRUE(found);
+    new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+    ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+  }
+
+  {
+    VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+    VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+    bool found = false;
+    for (const auto& entry : deps->fields_) {
+      if (!entry.IsResolved()) {
+        deps->fields_.insert(VerifierDeps::FieldResolution(0 /* we know there is a field there */,
+                                                           VerifierDeps::kUnresolvedMarker - 1,
+                                                           0  /* we know there is a class there */));
+        found = true;
+        break;
+      }
+    }
+    ASSERT_TRUE(found);
+    new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+    ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+  }
+
+  {
+    VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+    VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+    bool found = false;
+    for (const auto& entry : deps->fields_) {
+      if (entry.IsResolved()) {
+        deps->fields_.insert(VerifierDeps::FieldResolution(entry.GetDexFieldIndex(),
+                                                           entry.GetAccessFlags() - 1,
+                                                           entry.GetDeclaringClassIndex()));
+        found = true;
+        break;
+      }
+    }
+    ASSERT_TRUE(found);
+    new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+    ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+  }
+
+  {
+    VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+    VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+    bool found = false;
+    for (const auto& entry : deps->fields_) {
+      static constexpr uint32_t kNewTypeIndex = 0;
+      if (entry.GetDeclaringClassIndex() != kNewTypeIndex) {
+        deps->fields_.insert(VerifierDeps::FieldResolution(entry.GetDexFieldIndex(),
+                                                           entry.GetAccessFlags(),
+                                                           kNewTypeIndex));
+        found = true;
+        break;
+      }
+    }
+    ASSERT_TRUE(found);
+    new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+    ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+  }
+
+  // Mess up with methods.
+  for (MethodResolutionKind resolution_kind :
+            { kDirectMethodResolution, kVirtualMethodResolution, kInterfaceMethodResolution }) {
+    {
+      VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+      VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+      bool found = false;
+      std::set<VerifierDeps::MethodResolution>* methods = GetMethods(deps, resolution_kind);
+      for (const auto& entry : *methods) {
+        if (entry.IsResolved()) {
+          methods->insert(VerifierDeps::MethodResolution(entry.GetDexMethodIndex(),
+                                                         VerifierDeps::kUnresolvedMarker,
+                                                         entry.GetDeclaringClassIndex()));
+          found = true;
+          break;
+        }
+      }
+      ASSERT_TRUE(found);
+      new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+      ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+    }
+
+    {
+      VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+      VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+      bool found = false;
+      std::set<VerifierDeps::MethodResolution>* methods = GetMethods(deps, resolution_kind);
+      for (const auto& entry : *methods) {
+        if (!entry.IsResolved()) {
+          methods->insert(VerifierDeps::MethodResolution(0 /* we know there is a method there */,
+                                                         VerifierDeps::kUnresolvedMarker - 1,
+                                                         0  /* we know there is a class there */));
+          found = true;
+          break;
+        }
+      }
+      ASSERT_TRUE(found);
+      new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+      ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+    }
+
+    {
+      VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+      VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+      bool found = false;
+      std::set<VerifierDeps::MethodResolution>* methods = GetMethods(deps, resolution_kind);
+      for (const auto& entry : *methods) {
+        if (entry.IsResolved()) {
+          methods->insert(VerifierDeps::MethodResolution(entry.GetDexMethodIndex(),
+                                                         entry.GetAccessFlags() - 1,
+                                                         entry.GetDeclaringClassIndex()));
+          found = true;
+          break;
+        }
+      }
+      ASSERT_TRUE(found);
+      new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+      ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+    }
+
+    {
+      VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+      VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+      bool found = false;
+      std::set<VerifierDeps::MethodResolution>* methods = GetMethods(deps, resolution_kind);
+      for (const auto& entry : *methods) {
+        static constexpr uint32_t kNewTypeIndex = 0;
+        if (entry.IsResolved() && entry.GetDeclaringClassIndex() != kNewTypeIndex) {
+          methods->insert(VerifierDeps::MethodResolution(entry.GetDexMethodIndex(),
+                                                         entry.GetAccessFlags(),
+                                                         kNewTypeIndex));
+          found = true;
+          break;
+        }
+      }
+      ASSERT_TRUE(found);
+      new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+      ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+    }
+
+    {
+      VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+      VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+      bool found = false;
+      std::set<VerifierDeps::MethodResolution>* methods = GetMethods(deps, resolution_kind);
+      for (const auto& entry : *methods) {
+        if (entry.IsResolved()) {
+          GetMethods(deps, GetNextResolutionKind(resolution_kind))->insert(
+              VerifierDeps::MethodResolution(entry.GetDexMethodIndex(),
+                                             entry.GetAccessFlags(),
+                                             entry.GetDeclaringClassIndex()));
+          found = true;
+        }
+      }
+      ASSERT_TRUE(found);
+      new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+      ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+    }
+
+    {
+      VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+      VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+      bool found = false;
+      std::set<VerifierDeps::MethodResolution>* methods = GetMethods(deps, resolution_kind);
+      for (const auto& entry : *methods) {
+        if (entry.IsResolved()) {
+          GetMethods(deps, GetNextResolutionKind(GetNextResolutionKind(resolution_kind)))->insert(
+              VerifierDeps::MethodResolution(entry.GetDexMethodIndex(),
+                                             entry.GetAccessFlags(),
+                                             entry.GetDeclaringClassIndex()));
+          found = true;
+        }
+      }
+      ASSERT_TRUE(found);
+      new_class_loader.Assign(soa.Decode<mirror::ClassLoader>(LoadDex("VerifierDeps")));
+      ASSERT_FALSE(decoded_deps.ValidateDependencies(new_class_loader, soa.Self()));
+    }
+  }
+}
+
+TEST_F(VerifierDepsTest, CompilerDriver) {
+  SetupCompilerDriver();
+
+  // Test both multi-dex and single-dex configuration.
+  for (const char* multi : { "MultiDex", static_cast<const char*>(nullptr) }) {
+    // Test that the compiler driver behaves as expected when the dependencies
+    // verify and when they don't verify.
+    for (bool verify_failure : { false, true }) {
+      {
+        ScopedObjectAccess soa(Thread::Current());
+        LoadDexFile(&soa, "VerifierDeps", multi);
+      }
+      VerifyWithCompilerDriver(/* verifier_deps */ nullptr);
+
+      std::vector<uint8_t> buffer;
+      verifier_deps_->Encode(dex_files_, &buffer);
+
+      {
+        ScopedObjectAccess soa(Thread::Current());
+        LoadDexFile(&soa, "VerifierDeps", multi);
+      }
+      verifier::VerifierDeps decoded_deps(dex_files_, ArrayRef<const uint8_t>(buffer));
+      if (verify_failure) {
+        // Just taint the decoded VerifierDeps with one invalid entry.
+        VerifierDeps::DexFileDeps* deps = decoded_deps.GetDexFileDeps(*primary_dex_file_);
+        bool found = false;
+        for (const auto& entry : deps->classes_) {
+          if (entry.IsResolved()) {
+            deps->classes_.insert(VerifierDeps::ClassResolution(
+                entry.GetDexTypeIndex(), VerifierDeps::kUnresolvedMarker));
+            found = true;
+            break;
+          }
+        }
+        ASSERT_TRUE(found);
+      }
+      VerifyWithCompilerDriver(&decoded_deps);
+
+      if (verify_failure) {
+        ASSERT_FALSE(verifier_deps_ == nullptr);
+        ASSERT_FALSE(verifier_deps_->Equals(decoded_deps));
+      } else {
+        ASSERT_TRUE(verifier_deps_ == nullptr);
+        VerifyClassStatus(decoded_deps);
+      }
+    }
+  }
+}
+
 }  // namespace verifier
 }  // namespace art
diff --git a/dex2oat/dex2oat.cc b/dex2oat/dex2oat.cc
index 292aff4..65703a2 100644
--- a/dex2oat/dex2oat.cc
+++ b/dex2oat/dex2oat.cc
@@ -339,6 +339,11 @@
   UsageError("");
   UsageError("  --no-generate-mini-debug-info: Do not generate backtrace info.");
   UsageError("");
+  UsageError("  --generate-build-id: Generate GNU-compatible linker build ID ELF section with");
+  UsageError("      SHA-1 of the file content (and thus stable across identical builds)");
+  UsageError("");
+  UsageError("  --no-generate-build-id: Do not generate the build ID ELF section.");
+  UsageError("");
   UsageError("  --debuggable: Produce code debuggable with Java debugger.");
   UsageError("");
   UsageError("  --runtime-arg <argument>: used to specify various arguments for the runtime,");
@@ -512,7 +517,8 @@
       thread_count_(sysconf(_SC_NPROCESSORS_CONF)),
       start_ns_(NanoTime()),
       oat_fd_(-1),
-      vdex_fd_(-1),
+      input_vdex_fd_(-1),
+      output_vdex_fd_(-1),
       zip_fd_(-1),
       image_base_(0U),
       image_classes_zip_filename_(nullptr),
@@ -585,8 +591,13 @@
     ParseUintOption(option, "--zip-fd", &zip_fd_, Usage);
   }
 
-  void ParseVdexFd(const StringPiece& option) {
-    ParseUintOption(option, "--vdex-fd", &vdex_fd_, Usage);
+  void ParseInputVdexFd(const StringPiece& option) {
+    // Note that the input vdex fd might be -1.
+    ParseIntOption(option, "--input-vdex-fd", &input_vdex_fd_, Usage);
+  }
+
+  void ParseOutputVdexFd(const StringPiece& option) {
+    ParseUintOption(option, "--output-vdex-fd", &output_vdex_fd_, Usage);
   }
 
   void ParseOatFd(const StringPiece& option) {
@@ -632,9 +643,8 @@
   void ParseInstructionSetVariant(const StringPiece& option, ParserOptions* parser_options) {
     DCHECK(option.starts_with("--instruction-set-variant="));
     StringPiece str = option.substr(strlen("--instruction-set-variant=")).data();
-    instruction_set_features_.reset(
-        InstructionSetFeatures::FromVariant(
-            instruction_set_, str.as_string(), &parser_options->error_msg));
+    instruction_set_features_ = InstructionSetFeatures::FromVariant(
+        instruction_set_, str.as_string(), &parser_options->error_msg);
     if (instruction_set_features_.get() == nullptr) {
       Usage("%s", parser_options->error_msg.c_str());
     }
@@ -643,19 +653,18 @@
   void ParseInstructionSetFeatures(const StringPiece& option, ParserOptions* parser_options) {
     DCHECK(option.starts_with("--instruction-set-features="));
     StringPiece str = option.substr(strlen("--instruction-set-features=")).data();
-    if (instruction_set_features_.get() == nullptr) {
-      instruction_set_features_.reset(
-          InstructionSetFeatures::FromVariant(
-              instruction_set_, "default", &parser_options->error_msg));
+    if (instruction_set_features_ == nullptr) {
+      instruction_set_features_ = InstructionSetFeatures::FromVariant(
+          instruction_set_, "default", &parser_options->error_msg);
       if (instruction_set_features_.get() == nullptr) {
         Usage("Problem initializing default instruction set features variant: %s",
               parser_options->error_msg.c_str());
       }
     }
-    instruction_set_features_.reset(
+    instruction_set_features_ =
         instruction_set_features_->AddFeaturesFromString(str.as_string(),
-                                                         &parser_options->error_msg));
-    if (instruction_set_features_.get() == nullptr) {
+                                                         &parser_options->error_msg);
+    if (instruction_set_features_ == nullptr) {
       Usage("Error parsing '%s': %s", option.data(), parser_options->error_msg.c_str());
     }
   }
@@ -704,9 +713,9 @@
       Usage("--oat-file should not be used with --oat-fd");
     }
 
-    if ((vdex_fd_ == -1) != (oat_fd_ == -1)) {
+    if ((output_vdex_fd_ == -1) != (oat_fd_ == -1)) {
       Usage("VDEX and OAT output must be specified either with one --oat-filename "
-            "or with --oat-fd and --vdex-fd file descriptors");
+            "or with --oat-fd and --output-vdex-fd file descriptors");
     }
 
     if (!parser_options->oat_symbols.empty() && oat_fd_ != -1) {
@@ -717,8 +726,8 @@
       Usage("--oat-symbols should not be used with --host");
     }
 
-    if (vdex_fd_ != -1 && !image_filenames_.empty()) {
-      Usage("--vdex-fd should not be used with --image");
+    if (output_vdex_fd_ != -1 && !image_filenames_.empty()) {
+      Usage("--output-vdex-fd should not be used with --image");
     }
 
     if (oat_fd_ != -1 && !image_filenames_.empty()) {
@@ -823,9 +832,8 @@
     // If no instruction set feature was given, use the default one for the target
     // instruction set.
     if (instruction_set_features_.get() == nullptr) {
-      instruction_set_features_.reset(
-          InstructionSetFeatures::FromVariant(
-              instruction_set_, "default", &parser_options->error_msg));
+      instruction_set_features_ = InstructionSetFeatures::FromVariant(
+         instruction_set_, "default", &parser_options->error_msg);
       if (instruction_set_features_.get() == nullptr) {
         Usage("Problem initializing default instruction set features variant: %s",
               parser_options->error_msg.c_str());
@@ -988,7 +996,18 @@
       }
     }
 
-    char_backing_storage_.reserve((dex_locations_.size() - 1) * 2);
+    std::string base_symbol_oat;
+    if (!oat_unstripped_.empty()) {
+      base_symbol_oat = oat_unstripped_[0];
+      size_t last_symbol_oat_slash = base_symbol_oat.rfind('/');
+      if (last_symbol_oat_slash == std::string::npos) {
+        Usage("--multi-image used with unusable symbol filename %s", base_symbol_oat.c_str());
+      }
+      base_symbol_oat = base_symbol_oat.substr(0, last_symbol_oat_slash + 1);
+    }
+
+    const size_t num_expanded_files = 2 + (base_symbol_oat.empty() ? 0 : 1);
+    char_backing_storage_.reserve((dex_locations_.size() - 1) * num_expanded_files);
 
     // Now create the other names. Use a counted loop to skip the first one.
     for (size_t i = 1; i < dex_locations_.size(); ++i) {
@@ -1000,6 +1019,11 @@
       std::string oat_name = CreateMultiImageName(dex_locations_[i], prefix, infix, ".oat");
       char_backing_storage_.push_back(base_oat + oat_name);
       oat_filenames_.push_back((char_backing_storage_.end() - 1)->c_str());
+
+      if (!base_symbol_oat.empty()) {
+        char_backing_storage_.push_back(base_symbol_oat + oat_name);
+        oat_unstripped_.push_back((char_backing_storage_.end() - 1)->c_str());
+      }
     }
   }
 
@@ -1069,7 +1093,7 @@
     original_argc = argc;
     original_argv = argv;
 
-    InitLogging(argv);
+    InitLogging(argv, Runtime::Aborter);
 
     // Skip over argv[0].
     argv++;
@@ -1096,8 +1120,10 @@
         ParseZipFd(option);
       } else if (option.starts_with("--zip-location=")) {
         zip_location_ = option.substr(strlen("--zip-location=")).data();
-      } else if (option.starts_with("--vdex-fd=")) {
-        ParseVdexFd(option);
+      } else if (option.starts_with("--input-vdex-fd=")) {
+        ParseInputVdexFd(option);
+      } else if (option.starts_with("--output-vdex-fd=")) {
+        ParseOutputVdexFd(option);
       } else if (option.starts_with("--oat-file=")) {
         oat_filenames_.push_back(option.substr(strlen("--oat-file=")).data());
       } else if (option.starts_with("--oat-symbols=")) {
@@ -1240,7 +1266,7 @@
         }
         oat_files_.push_back(std::move(oat_file));
 
-        DCHECK_EQ(vdex_fd_, -1);
+        DCHECK_EQ(output_vdex_fd_, -1);
         std::string vdex_filename = ReplaceFileExtension(oat_filename, "vdex");
         std::unique_ptr<File> vdex_file(OS::CreateEmptyFile(vdex_filename.c_str()));
         if (vdex_file.get() == nullptr) {
@@ -1266,9 +1292,9 @@
       }
       oat_files_.push_back(std::move(oat_file));
 
-      DCHECK_NE(vdex_fd_, -1);
+      DCHECK_NE(output_vdex_fd_, -1);
       std::string vdex_location = ReplaceFileExtension(oat_location_, "vdex");
-      std::unique_ptr<File> vdex_file(new File(vdex_fd_, vdex_location, /* check_usage */ true));
+      std::unique_ptr<File> vdex_file(new File(output_vdex_fd_, vdex_location, /* check_usage */ true));
       if (vdex_file.get() == nullptr) {
         PLOG(ERROR) << "Failed to create vdex file: " << vdex_location;
         return false;
@@ -1479,12 +1505,6 @@
 
     dex_files_ = MakeNonOwningPointerVector(opened_dex_files_);
 
-    if (!IsBootImage()) {
-      // Collect verification dependencies when compiling an app.
-      verifier_deps_.reset(new verifier::VerifierDeps(dex_files_));
-      callbacks_->SetVerifierDeps(verifier_deps_.get());
-    }
-
     // We had to postpone the swap decision till now, as this is the point when we actually
     // know about the dex files we're going to use.
 
@@ -1642,7 +1662,7 @@
                                      swap_fd_,
                                      profile_compilation_info_.get()));
     driver_->SetDexFilesForOatFile(dex_files_);
-    driver_->CompileAll(class_loader_, dex_files_, timings_);
+    driver_->CompileAll(class_loader_, dex_files_, /* verifier_deps */ nullptr, timings_);
   }
 
   // Notes on the interleaving of creating the images and oat files to
@@ -1756,20 +1776,33 @@
       }
     }
 
+    // Initialize the writers with the compiler driver, image writer, and their
+    // dex files. The writers were created without those being there yet.
+    for (size_t i = 0, size = oat_files_.size(); i != size; ++i) {
+      std::unique_ptr<OatWriter>& oat_writer = oat_writers_[i];
+      std::vector<const DexFile*>& dex_files = dex_files_per_oat_file_[i];
+      oat_writer->Initialize(driver_.get(), image_writer_.get(), dex_files);
+    }
+
     {
       TimingLogger::ScopedTiming t2("dex2oat Write VDEX", timings_);
       DCHECK(IsBootImage() || oat_files_.size() == 1u);
-      DCHECK_EQ(IsBootImage(), verifier_deps_ == nullptr);
+      verifier::VerifierDeps* verifier_deps = callbacks_->GetVerifierDeps();
       for (size_t i = 0, size = oat_files_.size(); i != size; ++i) {
         File* vdex_file = vdex_files_[i].get();
         std::unique_ptr<BufferedOutputStream> vdex_out(
             MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(vdex_file)));
 
-        if (!oat_writers_[i]->WriteVerifierDeps(vdex_out.get(), verifier_deps_.get())) {
+        if (!oat_writers_[i]->WriteVerifierDeps(vdex_out.get(), verifier_deps)) {
           LOG(ERROR) << "Failed to write verifier dependencies into VDEX " << vdex_file->GetPath();
           return false;
         }
 
+        if (!oat_writers_[i]->WriteQuickeningInfo(vdex_out.get())) {
+          LOG(ERROR) << "Failed to write quickening info into VDEX " << vdex_file->GetPath();
+          return false;
+        }
+
         // VDEX finalized, seek back to the beginning and write the header.
         if (!oat_writers_[i]->WriteVdexHeader(vdex_out.get())) {
           LOG(ERROR) << "Failed to write vdex header into VDEX " << vdex_file->GetPath();
@@ -1778,15 +1811,14 @@
       }
     }
 
-    linker::MultiOatRelativePatcher patcher(instruction_set_, instruction_set_features_.get());
     {
       TimingLogger::ScopedTiming t2("dex2oat Write ELF", timings_);
+      linker::MultiOatRelativePatcher patcher(instruction_set_, instruction_set_features_.get());
       for (size_t i = 0, size = oat_files_.size(); i != size; ++i) {
         std::unique_ptr<ElfWriter>& elf_writer = elf_writers_[i];
         std::unique_ptr<OatWriter>& oat_writer = oat_writers_[i];
 
-        std::vector<const DexFile*>& dex_files = dex_files_per_oat_file_[i];
-        oat_writer->PrepareLayout(driver_.get(), image_writer_.get(), dex_files, &patcher);
+        oat_writer->PrepareLayout(&patcher);
 
         size_t rodata_size = oat_writer->GetOatHeader().GetExecutableOffset();
         size_t text_size = oat_writer->GetOatSize() - rodata_size;
@@ -1899,15 +1931,14 @@
         TimingLogger::ScopedTiming t("dex2oat OatFile copy", timings_);
         std::unique_ptr<File> in(OS::OpenFileForReading(oat_filenames_[i]));
         std::unique_ptr<File> out(OS::CreateEmptyFile(oat_unstripped_[i]));
-        size_t buffer_size = 8192;
-        std::unique_ptr<uint8_t[]> buffer(new uint8_t[buffer_size]);
-        while (true) {
-          int bytes_read = TEMP_FAILURE_RETRY(read(in->Fd(), buffer.get(), buffer_size));
-          if (bytes_read <= 0) {
-            break;
-          }
-          bool write_ok = out->WriteFully(buffer.get(), bytes_read);
-          CHECK(write_ok);
+        int64_t in_length = in->GetLength();
+        if (in_length < 0) {
+          PLOG(ERROR) << "Failed to get the length of oat file: " << in->GetPath();
+          return false;
+        }
+        if (!out->Copy(in.get(), 0, in_length)) {
+          PLOG(ERROR) << "Failed to copy oat file to file: " << out->GetPath();
+          return false;
         }
         if (out->FlushCloseOrErase() != 0) {
           PLOG(ERROR) << "Failed to flush and close copied oat file: " << oat_unstripped_[i];
@@ -2330,6 +2361,11 @@
       LOG(ERROR) << "Failed to create runtime";
       return false;
     }
+
+    // Runtime::Init will rename this thread to be "main". Prefer "dex2oat" so that "top" and
+    // "ps -a" don't change to non-descript "main."
+    SetThreadName(kIsDebugBuild ? "dex2oatd" : "dex2oat");
+
     runtime_.reset(Runtime::Current());
     runtime_->SetInstructionSet(instruction_set_);
     for (int i = 0; i < Runtime::kLastCalleeSaveType; i++) {
@@ -2553,7 +2589,8 @@
   std::vector<const char*> oat_filenames_;
   std::vector<const char*> oat_unstripped_;
   int oat_fd_;
-  int vdex_fd_;
+  int input_vdex_fd_;
+  int output_vdex_fd_;
   std::vector<const char*> dex_filenames_;
   std::vector<const char*> dex_locations_;
   int zip_fd_;
@@ -2616,9 +2653,6 @@
   std::vector<std::vector<const DexFile*>> dex_files_per_oat_file_;
   std::unordered_map<const DexFile*, size_t> dex_file_oat_index_map_;
 
-  // Collector of verifier dependencies.
-  std::unique_ptr<verifier::VerifierDeps> verifier_deps_;
-
   // Backing storage.
   std::vector<std::string> char_backing_storage_;
 
diff --git a/dex2oat/dex2oat_test.cc b/dex2oat/dex2oat_test.cc
index 58dd047..fa32178 100644
--- a/dex2oat/dex2oat_test.cc
+++ b/dex2oat/dex2oat_test.cc
@@ -438,9 +438,7 @@
 
     Copy(GetDexSrc1(), dex_location);
 
-    std::vector<std::string> copy(extra_args);
-
-    GenerateOdexForTest(dex_location, odex_location, filter, copy);
+    GenerateOdexForTest(dex_location, odex_location, filter, extra_args);
 
     CheckValidity();
     ASSERT_TRUE(success_);
diff --git a/dexdump/Android.bp b/dexdump/Android.bp
index 3e589f7..60ce363 100644
--- a/dexdump/Android.bp
+++ b/dexdump/Android.bp
@@ -18,6 +18,7 @@
     name: "dexdump2",
     host_supported: true,
     srcs: [
+        "dexdump_cfg.cc",
         "dexdump_main.cc",
         "dexdump.cc",
     ],
diff --git a/dexdump/dexdump.cc b/dexdump/dexdump.cc
index 2042934..30de28e 100644
--- a/dexdump/dexdump.cc
+++ b/dexdump/dexdump.cc
@@ -42,9 +42,10 @@
 #include <sstream>
 #include <vector>
 
+#include "base/stringprintf.h"
+#include "dexdump_cfg.h"
 #include "dex_file-inl.h"
 #include "dex_instruction-inl.h"
-#include "utils.h"
 
 namespace art {
 
@@ -780,9 +781,11 @@
 static std::unique_ptr<char[]> indexString(const DexFile* pDexFile,
                                            const Instruction* pDecInsn,
                                            size_t bufSize) {
+  static const u4 kInvalidIndex = std::numeric_limits<u4>::max();
   std::unique_ptr<char[]> buf(new char[bufSize]);
   // Determine index and width of the string.
   u4 index = 0;
+  u4 secondary_index = kInvalidIndex;
   u4 width = 4;
   switch (Instruction::FormatOf(pDecInsn->Opcode())) {
     // SOME NOT SUPPORTED:
@@ -806,6 +809,12 @@
       index = pDecInsn->VRegC();
       width = 4;
       break;
+    case Instruction::k45cc:
+    case Instruction::k4rcc:
+      index = pDecInsn->VRegB();
+      secondary_index = pDecInsn->VRegH();
+      width = 4;
+      break;
     default:
       break;
   }  // switch
@@ -870,6 +879,26 @@
     case Instruction::kIndexFieldOffset:
       outSize = snprintf(buf.get(), bufSize, "[obj+%0*x]", width, index);
       break;
+    case Instruction::kIndexMethodAndProtoRef: {
+        std::string method("<method?>");
+        std::string proto("<proto?>");
+        if (index < pDexFile->GetHeader().method_ids_size_) {
+          const DexFile::MethodId& pMethodId = pDexFile->GetMethodId(index);
+          const char* name = pDexFile->StringDataByIdx(pMethodId.name_idx_);
+          const Signature signature = pDexFile->GetMethodSignature(pMethodId);
+          const char* backDescriptor = pDexFile->StringByTypeIdx(pMethodId.class_idx_);
+          method = StringPrintf("%s.%s:%s",
+                                backDescriptor, name, signature.ToString().c_str());
+        }
+        if (secondary_index < pDexFile->GetHeader().proto_ids_size_) {
+          const DexFile::ProtoId& protoId = pDexFile->GetProtoId(secondary_index);
+          const Signature signature = pDexFile->GetProtoSignature(protoId);
+          proto = signature.ToString();
+        }
+        outSize = snprintf(buf.get(), bufSize, "%s, %s // method@%0*x, proto@%0*x",
+                           method.c_str(), proto.c_str(), width, index, width, secondary_index);
+      }
+      break;
     // SOME NOT SUPPORTED:
     // case Instruction::kIndexVaries:
     // case Instruction::kIndexInlineMethod:
@@ -1043,7 +1072,8 @@
     case Instruction::k32x:        // op vAAAA, vBBBB
       fprintf(gOutFile, " v%d, v%d", pDecInsn->VRegA(), pDecInsn->VRegB());
       break;
-    case Instruction::k35c: {      // op {vC, vD, vE, vF, vG}, thing@BBBB
+    case Instruction::k35c:       // op {vC, vD, vE, vF, vG}, thing@BBBB
+    case Instruction::k45cc: {    // op {vC, vD, vE, vF, vG}, method@BBBB, proto@HHHH
     // NOT SUPPORTED:
     // case Instruction::k35ms:       // [opt] invoke-virtual+super
     // case Instruction::k35mi:       // [opt] inline invoke
@@ -1061,10 +1091,10 @@
       break;
     }
     case Instruction::k3rc:        // op {vCCCC .. v(CCCC+AA-1)}, thing@BBBB
+    case Instruction::k4rcc: {     // op {vCCCC .. v(CCCC+AA-1)}, method@BBBB, proto@HHHH
     // NOT SUPPORTED:
     // case Instruction::k3rms:       // [opt] invoke-virtual+super/range
     // case Instruction::k3rmi:       // [opt] execute-inline/range
-      {
         // This doesn't match the "dx" output when some of the args are
         // 64-bit values -- dx only shows the first register.
         fputs(" {", gOutFile);
@@ -1328,7 +1358,7 @@
   if (code_item != nullptr) {
     std::ostringstream oss;
     DumpMethodCFG(dex_file, dex_method_idx, oss);
-    fprintf(gOutFile, "%s", oss.str().c_str());
+    fputs(oss.str().c_str(), gOutFile);
   }
 }
 
@@ -1551,10 +1581,15 @@
 /*
  * Dumps the requested sections of the file.
  */
-static void processDexFile(const char* fileName, const DexFile* pDexFile) {
+static void processDexFile(const char* fileName,
+                           const DexFile* pDexFile, size_t i, size_t n) {
   if (gOptions.verbose) {
-    fprintf(gOutFile, "Opened '%s', DEX version '%.3s'\n",
-            fileName, pDexFile->GetHeader().magic_ + 4);
+    fputs("Opened '", gOutFile);
+    fputs(fileName, gOutFile);
+    if (n > 1) {
+      fprintf(gOutFile, ":%s", DexFile::GetMultiDexClassesDexName(i).c_str());
+    }
+    fprintf(gOutFile, "', DEX version '%.3s'\n", pDexFile->GetHeader().magic_ + 4);
   }
 
   // Headers.
@@ -1612,8 +1647,8 @@
   if (gOptions.checksumOnly) {
     fprintf(gOutFile, "Checksum verified\n");
   } else {
-    for (size_t i = 0; i < dex_files.size(); i++) {
-      processDexFile(fileName, dex_files[i].get());
+    for (size_t i = 0, n = dex_files.size(); i < n; i++) {
+      processDexFile(fileName, dex_files[i].get(), i, n);
     }
   }
   return 0;
diff --git a/dexdump/dexdump_cfg.cc b/dexdump/dexdump_cfg.cc
new file mode 100644
index 0000000..9e58128
--- /dev/null
+++ b/dexdump/dexdump_cfg.cc
@@ -0,0 +1,395 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * Implementation file for control flow graph dumping for the dexdump utility.
+ */
+
+#include "dexdump_cfg.h"
+
+#include <inttypes.h>
+#include <ostream>
+#include <map>
+#include <set>
+
+#include "dex_file-inl.h"
+#include "dex_instruction-inl.h"
+
+namespace art {
+
+static void dumpMethodCFGImpl(const DexFile* dex_file,
+                              uint32_t dex_method_idx,
+                              const DexFile::CodeItem* code_item,
+                              std::ostream& os) {
+  os << "digraph {\n";
+  os << "  # /* " << dex_file->PrettyMethod(dex_method_idx, true) << " */\n";
+
+  std::set<uint32_t> dex_pc_is_branch_target;
+  {
+    // Go and populate.
+    const Instruction* inst = Instruction::At(code_item->insns_);
+    for (uint32_t dex_pc = 0;
+         dex_pc < code_item->insns_size_in_code_units_;
+         dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) {
+      if (inst->IsBranch()) {
+        dex_pc_is_branch_target.insert(dex_pc + inst->GetTargetOffset());
+      } else if (inst->IsSwitch()) {
+        const uint16_t* insns = code_item->insns_ + dex_pc;
+        int32_t switch_offset = insns[1] | (static_cast<int32_t>(insns[2]) << 16);
+        const uint16_t* switch_insns = insns + switch_offset;
+        uint32_t switch_count = switch_insns[1];
+        int32_t targets_offset;
+        if ((*insns & 0xff) == Instruction::PACKED_SWITCH) {
+          /* 0=sig, 1=count, 2/3=firstKey */
+          targets_offset = 4;
+        } else {
+          /* 0=sig, 1=count, 2..count*2 = keys */
+          targets_offset = 2 + 2 * switch_count;
+        }
+        for (uint32_t targ = 0; targ < switch_count; targ++) {
+          int32_t offset =
+              static_cast<int32_t>(switch_insns[targets_offset + targ * 2]) |
+              static_cast<int32_t>(switch_insns[targets_offset + targ * 2 + 1] << 16);
+          dex_pc_is_branch_target.insert(dex_pc + offset);
+        }
+      }
+    }
+  }
+
+  // Create nodes for "basic blocks."
+  std::map<uint32_t, uint32_t> dex_pc_to_node_id;  // This only has entries for block starts.
+  std::map<uint32_t, uint32_t> dex_pc_to_incl_id;  // This has entries for all dex pcs.
+
+  {
+    const Instruction* inst = Instruction::At(code_item->insns_);
+    bool first_in_block = true;
+    bool force_new_block = false;
+    for (uint32_t dex_pc = 0;
+         dex_pc < code_item->insns_size_in_code_units_;
+         dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) {
+      if (dex_pc == 0 ||
+          (dex_pc_is_branch_target.find(dex_pc) != dex_pc_is_branch_target.end()) ||
+          force_new_block) {
+        uint32_t id = dex_pc_to_node_id.size();
+        if (id > 0) {
+          // End last node.
+          os << "}\"];\n";
+        }
+        // Start next node.
+        os << "  node" << id << " [shape=record,label=\"{";
+        dex_pc_to_node_id.insert(std::make_pair(dex_pc, id));
+        first_in_block = true;
+        force_new_block = false;
+      }
+
+      // Register instruction.
+      dex_pc_to_incl_id.insert(std::make_pair(dex_pc, dex_pc_to_node_id.size() - 1));
+
+      // Print instruction.
+      if (!first_in_block) {
+        os << " | ";
+      } else {
+        first_in_block = false;
+      }
+
+      // Dump the instruction. Need to escape '"', '<', '>', '{' and '}'.
+      os << "<" << "p" << dex_pc << ">";
+      os << " 0x" << std::hex << dex_pc << std::dec << ": ";
+      std::string inst_str = inst->DumpString(dex_file);
+      size_t cur_start = 0;  // It's OK to start at zero, instruction dumps don't start with chars
+                             // we need to escape.
+      while (cur_start != std::string::npos) {
+        size_t next_escape = inst_str.find_first_of("\"{}<>", cur_start + 1);
+        if (next_escape == std::string::npos) {
+          os << inst_str.substr(cur_start, inst_str.size() - cur_start);
+          break;
+        } else {
+          os << inst_str.substr(cur_start, next_escape - cur_start);
+          // Escape all necessary characters.
+          while (next_escape < inst_str.size()) {
+            char c = inst_str.at(next_escape);
+            if (c == '"' || c == '{' || c == '}' || c == '<' || c == '>') {
+              os << '\\' << c;
+            } else {
+              break;
+            }
+            next_escape++;
+          }
+          if (next_escape >= inst_str.size()) {
+            next_escape = std::string::npos;
+          }
+          cur_start = next_escape;
+        }
+      }
+
+      // Force a new block for some fall-throughs and some instructions that terminate the "local"
+      // control flow.
+      force_new_block = inst->IsSwitch() || inst->IsBasicBlockEnd();
+    }
+    // Close last node.
+    if (dex_pc_to_node_id.size() > 0) {
+      os << "}\"];\n";
+    }
+  }
+
+  // Create edges between them.
+  {
+    std::ostringstream regular_edges;
+    std::ostringstream taken_edges;
+    std::ostringstream exception_edges;
+
+    // Common set of exception edges.
+    std::set<uint32_t> exception_targets;
+
+    // These blocks (given by the first dex pc) need exception per dex-pc handling in a second
+    // pass. In the first pass we try and see whether we can use a common set of edges.
+    std::set<uint32_t> blocks_with_detailed_exceptions;
+
+    {
+      uint32_t last_node_id = std::numeric_limits<uint32_t>::max();
+      uint32_t old_dex_pc = 0;
+      uint32_t block_start_dex_pc = std::numeric_limits<uint32_t>::max();
+      const Instruction* inst = Instruction::At(code_item->insns_);
+      for (uint32_t dex_pc = 0;
+          dex_pc < code_item->insns_size_in_code_units_;
+          old_dex_pc = dex_pc, dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) {
+        {
+          auto it = dex_pc_to_node_id.find(dex_pc);
+          if (it != dex_pc_to_node_id.end()) {
+            if (!exception_targets.empty()) {
+              // It seems the last block had common exception handlers. Add the exception edges now.
+              uint32_t node_id = dex_pc_to_node_id.find(block_start_dex_pc)->second;
+              for (uint32_t handler_pc : exception_targets) {
+                auto node_id_it = dex_pc_to_incl_id.find(handler_pc);
+                if (node_id_it != dex_pc_to_incl_id.end()) {
+                  exception_edges << "  node" << node_id
+                      << " -> node" << node_id_it->second << ":p" << handler_pc
+                      << ";\n";
+                }
+              }
+              exception_targets.clear();
+            }
+
+            block_start_dex_pc = dex_pc;
+
+            // Seems to be a fall-through, connect to last_node_id. May be spurious edges for things
+            // like switch data.
+            uint32_t old_last = last_node_id;
+            last_node_id = it->second;
+            if (old_last != std::numeric_limits<uint32_t>::max()) {
+              regular_edges << "  node" << old_last << ":p" << old_dex_pc
+                  << " -> node" << last_node_id << ":p" << dex_pc
+                  << ";\n";
+            }
+          }
+
+          // Look at the exceptions of the first entry.
+          CatchHandlerIterator catch_it(*code_item, dex_pc);
+          for (; catch_it.HasNext(); catch_it.Next()) {
+            exception_targets.insert(catch_it.GetHandlerAddress());
+          }
+        }
+
+        // Handle instruction.
+
+        // Branch: something with at most two targets.
+        if (inst->IsBranch()) {
+          const int32_t offset = inst->GetTargetOffset();
+          const bool conditional = !inst->IsUnconditional();
+
+          auto target_it = dex_pc_to_node_id.find(dex_pc + offset);
+          if (target_it != dex_pc_to_node_id.end()) {
+            taken_edges << "  node" << last_node_id << ":p" << dex_pc
+                << " -> node" << target_it->second << ":p" << (dex_pc + offset)
+                << ";\n";
+          }
+          if (!conditional) {
+            // No fall-through.
+            last_node_id = std::numeric_limits<uint32_t>::max();
+          }
+        } else if (inst->IsSwitch()) {
+          // TODO: Iterate through all switch targets.
+          const uint16_t* insns = code_item->insns_ + dex_pc;
+          /* make sure the start of the switch is in range */
+          int32_t switch_offset = insns[1] | (static_cast<int32_t>(insns[2]) << 16);
+          /* offset to switch table is a relative branch-style offset */
+          const uint16_t* switch_insns = insns + switch_offset;
+          uint32_t switch_count = switch_insns[1];
+          int32_t targets_offset;
+          if ((*insns & 0xff) == Instruction::PACKED_SWITCH) {
+            /* 0=sig, 1=count, 2/3=firstKey */
+            targets_offset = 4;
+          } else {
+            /* 0=sig, 1=count, 2..count*2 = keys */
+            targets_offset = 2 + 2 * switch_count;
+          }
+          /* make sure the end of the switch is in range */
+          /* verify each switch target */
+          for (uint32_t targ = 0; targ < switch_count; targ++) {
+            int32_t offset =
+                static_cast<int32_t>(switch_insns[targets_offset + targ * 2]) |
+                static_cast<int32_t>(switch_insns[targets_offset + targ * 2 + 1] << 16);
+            int32_t abs_offset = dex_pc + offset;
+            auto target_it = dex_pc_to_node_id.find(abs_offset);
+            if (target_it != dex_pc_to_node_id.end()) {
+              // TODO: value label.
+              taken_edges << "  node" << last_node_id << ":p" << dex_pc
+                  << " -> node" << target_it->second << ":p" << (abs_offset)
+                  << ";\n";
+            }
+          }
+        }
+
+        // Exception edges. If this is not the first instruction in the block
+        if (block_start_dex_pc != dex_pc) {
+          std::set<uint32_t> current_handler_pcs;
+          CatchHandlerIterator catch_it(*code_item, dex_pc);
+          for (; catch_it.HasNext(); catch_it.Next()) {
+            current_handler_pcs.insert(catch_it.GetHandlerAddress());
+          }
+          if (current_handler_pcs != exception_targets) {
+            exception_targets.clear();  // Clear so we don't do something at the end.
+            blocks_with_detailed_exceptions.insert(block_start_dex_pc);
+          }
+        }
+
+        if (inst->IsReturn() ||
+            (inst->Opcode() == Instruction::THROW) ||
+            (inst->IsBranch() && inst->IsUnconditional())) {
+          // No fall-through.
+          last_node_id = std::numeric_limits<uint32_t>::max();
+        }
+      }
+      // Finish up the last block, if it had common exceptions.
+      if (!exception_targets.empty()) {
+        // It seems the last block had common exception handlers. Add the exception edges now.
+        uint32_t node_id = dex_pc_to_node_id.find(block_start_dex_pc)->second;
+        for (uint32_t handler_pc : exception_targets) {
+          auto node_id_it = dex_pc_to_incl_id.find(handler_pc);
+          if (node_id_it != dex_pc_to_incl_id.end()) {
+            exception_edges << "  node" << node_id
+                << " -> node" << node_id_it->second << ":p" << handler_pc
+                << ";\n";
+          }
+        }
+        exception_targets.clear();
+      }
+    }
+
+    // Second pass for detailed exception blocks.
+    // TODO
+    // Exception edges. If this is not the first instruction in the block
+    for (uint32_t dex_pc : blocks_with_detailed_exceptions) {
+      const Instruction* inst = Instruction::At(&code_item->insns_[dex_pc]);
+      uint32_t this_node_id = dex_pc_to_incl_id.find(dex_pc)->second;
+      while (true) {
+        CatchHandlerIterator catch_it(*code_item, dex_pc);
+        if (catch_it.HasNext()) {
+          std::set<uint32_t> handled_targets;
+          for (; catch_it.HasNext(); catch_it.Next()) {
+            uint32_t handler_pc = catch_it.GetHandlerAddress();
+            auto it = handled_targets.find(handler_pc);
+            if (it == handled_targets.end()) {
+              auto node_id_it = dex_pc_to_incl_id.find(handler_pc);
+              if (node_id_it != dex_pc_to_incl_id.end()) {
+                exception_edges << "  node" << this_node_id << ":p" << dex_pc
+                    << " -> node" << node_id_it->second << ":p" << handler_pc
+                    << ";\n";
+              }
+
+              // Mark as done.
+              handled_targets.insert(handler_pc);
+            }
+          }
+        }
+        if (inst->IsBasicBlockEnd()) {
+          break;
+        }
+
+        // Loop update. Have a break-out if the next instruction is a branch target and thus in
+        // another block.
+        dex_pc += inst->SizeInCodeUnits();
+        if (dex_pc >= code_item->insns_size_in_code_units_) {
+          break;
+        }
+        if (dex_pc_to_node_id.find(dex_pc) != dex_pc_to_node_id.end()) {
+          break;
+        }
+        inst = inst->Next();
+      }
+    }
+
+    // Write out the sub-graphs to make edges styled.
+    os << "\n";
+    os << "  subgraph regular_edges {\n";
+    os << "    edge [color=\"#000000\",weight=.3,len=3];\n\n";
+    os << "    " << regular_edges.str() << "\n";
+    os << "  }\n\n";
+
+    os << "  subgraph taken_edges {\n";
+    os << "    edge [color=\"#00FF00\",weight=.3,len=3];\n\n";
+    os << "    " << taken_edges.str() << "\n";
+    os << "  }\n\n";
+
+    os << "  subgraph exception_edges {\n";
+    os << "    edge [color=\"#FF0000\",weight=.3,len=3];\n\n";
+    os << "    " << exception_edges.str() << "\n";
+    os << "  }\n\n";
+  }
+
+  os << "}\n";
+}
+
+void DumpMethodCFG(const DexFile* dex_file, uint32_t dex_method_idx, std::ostream& os) {
+  // This is painful, we need to find the code item. That means finding the class, and then
+  // iterating the table.
+  if (dex_method_idx >= dex_file->NumMethodIds()) {
+    os << "Could not find method-idx.";
+    return;
+  }
+  const DexFile::MethodId& method_id = dex_file->GetMethodId(dex_method_idx);
+
+  const DexFile::ClassDef* class_def = dex_file->FindClassDef(method_id.class_idx_);
+  if (class_def == nullptr) {
+    os << "Could not find class-def.";
+    return;
+  }
+
+  const uint8_t* class_data = dex_file->GetClassData(*class_def);
+  if (class_data == nullptr) {
+    os << "No class data.";
+    return;
+  }
+
+  ClassDataItemIterator it(*dex_file, class_data);
+  // Skip fields
+  while (it.HasNextStaticField() || it.HasNextInstanceField()) {
+    it.Next();
+  }
+
+  // Find method, and dump it.
+  while (it.HasNextDirectMethod() || it.HasNextVirtualMethod()) {
+    uint32_t method_idx = it.GetMemberIndex();
+    if (method_idx == dex_method_idx) {
+      dumpMethodCFGImpl(dex_file, dex_method_idx, it.GetMethodCodeItem(), os);
+      return;
+    }
+    it.Next();
+  }
+
+  // Otherwise complain.
+  os << "Something went wrong, didn't find the method in the class data.";
+}
+
+}  // namespace art
diff --git a/test/562-no-intermediate/src/Main.java b/dexdump/dexdump_cfg.h
similarity index 67%
copy from test/562-no-intermediate/src/Main.java
copy to dexdump/dexdump_cfg.h
index 3b74d6f..64e5f9a 100644
--- a/test/562-no-intermediate/src/Main.java
+++ b/dexdump/dexdump_cfg.h
@@ -14,14 +14,18 @@
  * limitations under the License.
  */
 
-public class Main {
+#ifndef ART_DEXDUMP_DEXDUMP_CFG_H_
+#define ART_DEXDUMP_DEXDUMP_CFG_H_
 
-  /// CHECK-START-ARM64: int Main.main(String[]) register_allocator (after)
-  /// CHECK-NOT: IntermediateAddress
-  public static void main(String[] args) {
-    array[index] += Math.cos(42);
-  }
+#include <inttypes.h>
+#include <ostream>
 
-  static int index = 0;
-  static double[] array = new double[2];
-}
+namespace art {
+
+class DexFile;
+
+void DumpMethodCFG(const DexFile* dex_file, uint32_t dex_method_idx, std::ostream& os);
+
+}  // namespace art
+
+#endif  // ART_DEXDUMP_DEXDUMP_CFG_H_
diff --git a/dexdump/dexdump_main.cc b/dexdump/dexdump_main.cc
index 5c032a0..74cae3c 100644
--- a/dexdump/dexdump_main.cc
+++ b/dexdump/dexdump_main.cc
@@ -29,6 +29,7 @@
 #include <unistd.h>
 
 #include "base/logging.h"
+#include "runtime.h"
 #include "mem_map.h"
 
 namespace art {
@@ -59,7 +60,7 @@
  */
 int dexdumpDriver(int argc, char** argv) {
   // Art specific set up.
-  InitLogging(argv);
+  InitLogging(argv, Runtime::Aborter);
   MemMap::Init();
 
   // Reset options.
diff --git a/dexlayout/dex_ir.cc b/dexlayout/dex_ir.cc
index bc909c3..c3c763f 100644
--- a/dexlayout/dex_ir.cc
+++ b/dexlayout/dex_ir.cc
@@ -21,6 +21,7 @@
  */
 
 #include "dex_ir.h"
+#include "dex_instruction-inl.h"
 #include "dex_ir_builder.h"
 
 namespace art {
@@ -103,6 +104,105 @@
   }
 }
 
+static bool GetIdFromInstruction(Collections& collections,
+                                 const Instruction* dec_insn,
+                                 std::vector<TypeId*>* type_ids,
+                                 std::vector<StringId*>* string_ids,
+                                 std::vector<MethodId*>* method_ids,
+                                 std::vector<FieldId*>* field_ids) {
+  // Determine index and width of the string.
+  uint32_t index = 0;
+  switch (Instruction::FormatOf(dec_insn->Opcode())) {
+    // SOME NOT SUPPORTED:
+    // case Instruction::k20bc:
+    case Instruction::k21c:
+    case Instruction::k35c:
+    // case Instruction::k35ms:
+    case Instruction::k3rc:
+    // case Instruction::k3rms:
+    // case Instruction::k35mi:
+    // case Instruction::k3rmi:
+    case Instruction::k45cc:
+    case Instruction::k4rcc:
+      index = dec_insn->VRegB();
+      break;
+    case Instruction::k31c:
+      index = dec_insn->VRegB();
+      break;
+    case Instruction::k22c:
+    // case Instruction::k22cs:
+      index = dec_insn->VRegC();
+      break;
+    default:
+      break;
+  }  // switch
+
+  // Determine index type, and add reference to the appropriate collection.
+  switch (Instruction::IndexTypeOf(dec_insn->Opcode())) {
+    case Instruction::kIndexTypeRef:
+      if (index < collections.TypeIdsSize()) {
+        type_ids->push_back(collections.GetTypeId(index));
+        return true;
+      }
+      break;
+    case Instruction::kIndexStringRef:
+      if (index < collections.StringIdsSize()) {
+        string_ids->push_back(collections.GetStringId(index));
+        return true;
+      }
+      break;
+    case Instruction::kIndexMethodRef:
+    case Instruction::kIndexMethodAndProtoRef:
+      if (index < collections.MethodIdsSize()) {
+        method_ids->push_back(collections.GetMethodId(index));
+        return true;
+      }
+      break;
+    case Instruction::kIndexFieldRef:
+      if (index < collections.FieldIdsSize()) {
+        field_ids->push_back(collections.GetFieldId(index));
+        return true;
+      }
+      break;
+    case Instruction::kIndexUnknown:
+    case Instruction::kIndexNone:
+    case Instruction::kIndexVtableOffset:
+    case Instruction::kIndexFieldOffset:
+    default:
+      break;
+  }  // switch
+  return false;
+}
+
+/*
+ * Get all the types, strings, methods, and fields referred to from bytecode.
+ */
+static bool GetIdsFromByteCode(Collections& collections,
+                               const CodeItem* code,
+                               std::vector<TypeId*>* type_ids,
+                               std::vector<StringId*>* string_ids,
+                               std::vector<MethodId*>* method_ids,
+                               std::vector<FieldId*>* field_ids) {
+  bool has_id = false;
+  // Iterate over all instructions.
+  const uint16_t* insns = code->Insns();
+  for (uint32_t insn_idx = 0; insn_idx < code->InsnsSize();) {
+    const Instruction* instruction = Instruction::At(&insns[insn_idx]);
+    const uint32_t insn_width = instruction->SizeInCodeUnits();
+    if (insn_width == 0) {
+      break;
+    }
+    has_id |= GetIdFromInstruction(collections,
+                                   instruction,
+                                   type_ids,
+                                   string_ids,
+                                   method_ids,
+                                   field_ids);
+    insn_idx += insn_width;
+  }  // for
+  return has_id;
+}
+
 EncodedValue* Collections::ReadEncodedValue(const uint8_t** data) {
   const uint8_t encoded_value = *(*data)++;
   const uint8_t type = encoded_value & 0x1f;
@@ -514,6 +614,26 @@
   CodeItem* code_item = new CodeItem(
       registers_size, ins_size, outs_size, debug_info, insns_size, insns, tries, handler_list);
   code_items_.AddItem(code_item, offset);
+  // Add "fixup" references to types, strings, methods, and fields.
+  // This is temporary, as we will probably want more detailed parsing of the
+  // instructions here.
+  std::unique_ptr<std::vector<TypeId*>> type_ids(new std::vector<TypeId*>());
+  std::unique_ptr<std::vector<StringId*>> string_ids(new std::vector<StringId*>());
+  std::unique_ptr<std::vector<MethodId*>> method_ids(new std::vector<MethodId*>());
+  std::unique_ptr<std::vector<FieldId*>> field_ids(new std::vector<FieldId*>());
+  if (GetIdsFromByteCode(*this,
+                         code_item,
+                         type_ids.get(),
+                         string_ids.get(),
+                         method_ids.get(),
+                         field_ids.get())) {
+    CodeFixups* fixups = new CodeFixups(type_ids.release(),
+                                        string_ids.release(),
+                                        method_ids.release(),
+                                        field_ids.release());
+    code_item->SetCodeFixups(fixups);
+  }
+
   return code_item;
 }
 
diff --git a/dexlayout/dex_ir.h b/dexlayout/dex_ir.h
index 5e686d3..38eb0b1 100644
--- a/dexlayout/dex_ir.h
+++ b/dexlayout/dex_ir.h
@@ -802,6 +802,31 @@
 
 using TryItemVector = std::vector<std::unique_ptr<const TryItem>>;
 
+class CodeFixups {
+ public:
+  CodeFixups(std::vector<TypeId*>* type_ids,
+             std::vector<StringId*>* string_ids,
+             std::vector<MethodId*>* method_ids,
+             std::vector<FieldId*>* field_ids)
+      : type_ids_(type_ids),
+        string_ids_(string_ids),
+        method_ids_(method_ids),
+        field_ids_(field_ids) { }
+
+  std::vector<TypeId*>* TypeIds() const { return type_ids_.get(); }
+  std::vector<StringId*>* StringIds() const { return string_ids_.get(); }
+  std::vector<MethodId*>* MethodIds() const { return method_ids_.get(); }
+  std::vector<FieldId*>* FieldIds() const { return field_ids_.get(); }
+
+ private:
+  std::unique_ptr<std::vector<TypeId*>> type_ids_;
+  std::unique_ptr<std::vector<StringId*>> string_ids_;
+  std::unique_ptr<std::vector<MethodId*>> method_ids_;
+  std::unique_ptr<std::vector<FieldId*>> field_ids_;
+
+  DISALLOW_COPY_AND_ASSIGN(CodeFixups);
+};
+
 class CodeItem : public Item {
  public:
   CodeItem(uint16_t registers_size,
@@ -833,6 +858,9 @@
   TryItemVector* Tries() const { return tries_.get(); }
   CatchHandlerVector* Handlers() const { return handlers_.get(); }
 
+  void SetCodeFixups(CodeFixups* fixups) { fixups_.reset(fixups); }
+  CodeFixups* GetCodeFixups() const { return fixups_.get(); }
+
   void Accept(AbstractDispatcher* dispatch) { dispatch->Dispatch(this); }
 
  private:
@@ -844,6 +872,7 @@
   std::unique_ptr<uint16_t[]> insns_;
   std::unique_ptr<TryItemVector> tries_;  // This can be nullptr.
   std::unique_ptr<CatchHandlerVector> handlers_;  // This can be nullptr.
+  std::unique_ptr<CodeFixups> fixups_;  // This can be nullptr.
 
   DISALLOW_COPY_AND_ASSIGN(CodeItem);
 };
diff --git a/dexlayout/dex_visualize.cc b/dexlayout/dex_visualize.cc
index 46dff5f..bc9ca6d 100644
--- a/dexlayout/dex_visualize.cc
+++ b/dexlayout/dex_visualize.cc
@@ -279,6 +279,25 @@
     const dex_ir::CodeItem* code_item = method->GetCodeItem();
     if (code_item != nullptr) {
       DumpAddressRange(code_item, class_index);
+      const dex_ir::CodeFixups* fixups = code_item->GetCodeFixups();
+      if (fixups != nullptr) {
+        std::vector<dex_ir::TypeId*>* type_ids = fixups->TypeIds();
+        for (dex_ir::TypeId* type_id : *type_ids) {
+          DumpTypeId(type_id, class_index);
+        }
+        std::vector<dex_ir::StringId*>* string_ids = fixups->StringIds();
+        for (dex_ir::StringId* string_id : *string_ids) {
+          DumpStringId(string_id, class_index);
+        }
+        std::vector<dex_ir::MethodId*>* method_ids = fixups->MethodIds();
+        for (dex_ir::MethodId* method_id : *method_ids) {
+          DumpMethodId(method_id, class_index);
+        }
+        std::vector<dex_ir::FieldId*>* field_ids = fixups->FieldIds();
+        for (dex_ir::FieldId* field_id : *field_ids) {
+          DumpFieldId(field_id, class_index);
+        }
+      }
     }
   }
 
diff --git a/dexlayout/dexlayout.cc b/dexlayout/dexlayout.cc
index a9ae55f..aa80655 100644
--- a/dexlayout/dexlayout.cc
+++ b/dexlayout/dexlayout.cc
@@ -30,6 +30,7 @@
 #include <sstream>
 #include <vector>
 
+#include "base/stringprintf.h"
 #include "dex_ir_builder.h"
 #include "dex_file-inl.h"
 #include "dex_instruction-inl.h"
@@ -722,9 +723,11 @@
 static std::unique_ptr<char[]> IndexString(dex_ir::Header* header,
                                            const Instruction* dec_insn,
                                            size_t buf_size) {
+  static const uint32_t kInvalidIndex = std::numeric_limits<uint32_t>::max();
   std::unique_ptr<char[]> buf(new char[buf_size]);
   // Determine index and width of the string.
   uint32_t index = 0;
+  uint32_t secondary_index = kInvalidIndex;
   uint32_t width = 4;
   switch (Instruction::FormatOf(dec_insn->Opcode())) {
     // SOME NOT SUPPORTED:
@@ -748,6 +751,12 @@
       index = dec_insn->VRegC();
       width = 4;
       break;
+    case Instruction::k45cc:
+    case Instruction::k4rcc:
+      index = dec_insn->VRegB();
+      secondary_index = dec_insn->VRegH();
+      width = 4;
+      break;
     default:
       break;
   }  // switch
@@ -815,6 +824,24 @@
     // SOME NOT SUPPORTED:
     // case Instruction::kIndexVaries:
     // case Instruction::kIndexInlineMethod:
+    case Instruction::kIndexMethodAndProtoRef: {
+      std::string method("<method?>");
+      std::string proto("<proto?>");
+      if (index < header->GetCollections().MethodIdsSize()) {
+        dex_ir::MethodId* method_id = header->GetCollections().GetMethodId(index);
+        const char* name = method_id->Name()->Data();
+        std::string type_descriptor = GetSignatureForProtoId(method_id->Proto());
+        const char* back_descriptor = method_id->Class()->GetStringId()->Data();
+        method = StringPrintf("%s.%s:%s", back_descriptor, name, type_descriptor.c_str());
+      }
+      if (secondary_index < header->GetCollections().ProtoIdsSize()) {
+        dex_ir::ProtoId* proto_id = header->GetCollections().GetProtoId(secondary_index);
+        proto = GetSignatureForProtoId(proto_id);
+      }
+      outSize = snprintf(buf.get(), buf_size, "%s, %s // method@%0*x, proto@%0*x",
+                         method.c_str(), proto.c_str(), width, index, width, secondary_index);
+      }
+      break;
     default:
       outSize = snprintf(buf.get(), buf_size, "<?>");
       break;
@@ -984,7 +1011,8 @@
     case Instruction::k32x:        // op vAAAA, vBBBB
       fprintf(out_file_, " v%d, v%d", dec_insn->VRegA(), dec_insn->VRegB());
       break;
-    case Instruction::k35c: {      // op {vC, vD, vE, vF, vG}, thing@BBBB
+    case Instruction::k35c:           // op {vC, vD, vE, vF, vG}, thing@BBBB
+    case Instruction::k45cc: {        // op {vC, vD, vE, vF, vG}, meth@BBBB, proto@HHHH
     // NOT SUPPORTED:
     // case Instruction::k35ms:       // [opt] invoke-virtual+super
     // case Instruction::k35mi:       // [opt] inline invoke
@@ -1001,7 +1029,8 @@
       fprintf(out_file_, "}, %s", index_buf.get());
       break;
     }
-    case Instruction::k3rc:        // op {vCCCC .. v(CCCC+AA-1)}, thing@BBBB
+    case Instruction::k3rc:           // op {vCCCC .. v(CCCC+AA-1)}, thing@BBBB
+    case Instruction::k4rcc:          // op {vCCCC .. v(CCCC+AA-1)}, meth@BBBB, proto@HHHH
     // NOT SUPPORTED:
     // case Instruction::k3rms:       // [opt] invoke-virtual+super/range
     // case Instruction::k3rmi:       // [opt] execute-inline/range
@@ -1257,49 +1286,6 @@
 }
 
 /*
- * Dumping a CFG. Note that this will do duplicate work. utils.h doesn't expose the code-item
- * version, so the DumpMethodCFG code will have to iterate again to find it. But dexdump is a
- * tool, so this is not performance-critical.
- */
-
-static void DumpCFG(const DexFile* dex_file,
-                    uint32_t dex_method_idx,
-                    const DexFile::CodeItem* code) {
-  if (code != nullptr) {
-    std::ostringstream oss;
-    DumpMethodCFG(dex_file, dex_method_idx, oss);
-    fprintf(out_file_, "%s", oss.str().c_str());
-  }
-}
-
-static void DumpCFG(const DexFile* dex_file, int idx) {
-  const DexFile::ClassDef& class_def = dex_file->GetClassDef(idx);
-  const uint8_t* class_data = dex_file->GetClassData(class_def);
-  if (class_data == nullptr) {  // empty class such as a marker interface?
-    return;
-  }
-  ClassDataItemIterator it(*dex_file, class_data);
-  while (it.HasNextStaticField()) {
-    it.Next();
-  }
-  while (it.HasNextInstanceField()) {
-    it.Next();
-  }
-  while (it.HasNextDirectMethod()) {
-    DumpCFG(dex_file,
-            it.GetMemberIndex(),
-            it.GetMethodCodeItem());
-    it.Next();
-  }
-  while (it.HasNextVirtualMethod()) {
-    DumpCFG(dex_file,
-            it.GetMemberIndex(),
-            it.GetMethodCodeItem());
-    it.Next();
-  }
-}
-
-/*
  * Dumps the class.
  *
  * Note "idx" is a DexClassDef index, not a DexTypeId index.
@@ -1307,10 +1293,7 @@
  * If "*last_package" is nullptr or does not match the current class' package,
  * the value will be replaced with a newly-allocated string.
  */
-static void DumpClass(const DexFile* dex_file,
-                      dex_ir::Header* header,
-                      int idx,
-                      char** last_package) {
+static void DumpClass(dex_ir::Header* header, int idx, char** last_package) {
   dex_ir::ClassDef* class_def = header->GetCollections().GetClassDef(idx);
   // Omitting non-public class.
   if (options_.exports_only_ && (class_def->GetAccessFlags() & kAccPublic) == 0) {
@@ -1325,11 +1308,6 @@
     DumpClassAnnotations(header, idx);
   }
 
-  if (options_.show_cfg_) {
-    DumpCFG(dex_file, idx);
-    return;
-  }
-
   // For the XML output, show the package name.  Ideally we'd gather
   // up the classes, sort them, and dump them alphabetically so the
   // package name wouldn't jump around, but that's not a great plan
@@ -1532,7 +1510,7 @@
   char* package = nullptr;
   const uint32_t class_defs_size = header->GetCollections().ClassDefsSize();
   for (uint32_t i = 0; i < class_defs_size; i++) {
-    DumpClass(dex_file, header.get(), i, &package);
+    DumpClass(header.get(), i, &package);
   }  // for
 
   // Free the last package allocated.
@@ -1549,7 +1527,7 @@
   // Output dex file.
   if (options_.output_dex_directory_ != nullptr) {
     std::string output_location(options_.output_dex_directory_);
-    size_t last_slash = dex_file->GetLocation().rfind("/");
+    size_t last_slash = dex_file->GetLocation().rfind('/');
     output_location.append(dex_file->GetLocation().substr(last_slash));
     DexWriter::OutputDexFile(*header, output_location.c_str());
   }
diff --git a/dexlayout/dexlayout.h b/dexlayout/dexlayout.h
index c01eb79..a5bd992 100644
--- a/dexlayout/dexlayout.h
+++ b/dexlayout/dexlayout.h
@@ -44,7 +44,6 @@
   bool exports_only_;
   bool ignore_bad_checksum_;
   bool show_annotations_;
-  bool show_cfg_;
   bool show_file_headers_;
   bool show_section_headers_;
   bool verbose_;
diff --git a/dexlayout/dexlayout_main.cc b/dexlayout/dexlayout_main.cc
index 728e389..825dd50 100644
--- a/dexlayout/dexlayout_main.cc
+++ b/dexlayout/dexlayout_main.cc
@@ -31,6 +31,7 @@
 
 #include "base/logging.h"
 #include "jit/offline_profiling_info.h"
+#include "runtime.h"
 #include "mem_map.h"
 
 namespace art {
@@ -50,7 +51,6 @@
   fprintf(stderr, " -d : disassemble code sections\n");
   fprintf(stderr, " -e : display exported items only\n");
   fprintf(stderr, " -f : display summary information from file header\n");
-  fprintf(stderr, " -g : display CFG for dex\n");
   fprintf(stderr, " -h : display file header details\n");
   fprintf(stderr, " -i : ignore checksum failures\n");
   fprintf(stderr, " -l : output layout, either 'plain' or 'xml'\n");
@@ -65,7 +65,7 @@
  */
 int DexlayoutDriver(int argc, char** argv) {
   // Art specific set up.
-  InitLogging(argv);
+  InitLogging(argv, Runtime::Aborter);
   MemMap::Init();
 
   // Reset options.
@@ -98,9 +98,6 @@
       case 'f':  // display outer file header
         options_.show_file_headers_ = true;
         break;
-      case 'g':  // display cfg
-        options_.show_cfg_ = true;
-        break;
       case 'h':  // display section headers, i.e. all meta-data
         options_.show_section_headers_ = true;
         break;
diff --git a/dexlayout/dexlayout_test.cc b/dexlayout/dexlayout_test.cc
index 89544d7..c7f36be 100644
--- a/dexlayout/dexlayout_test.cc
+++ b/dexlayout/dexlayout_test.cc
@@ -37,12 +37,12 @@
   bool FullPlainOutputExec(std::string* error_msg) {
     // TODO: dexdump2 -> dexdump ?
     ScratchFile dexdump_output;
-    std::string dexdump_filename = dexdump_output.GetFilename();
+    const std::string& dexdump_filename = dexdump_output.GetFilename();
     std::string dexdump = GetTestAndroidRoot() + "/bin/dexdump2";
     EXPECT_TRUE(OS::FileExists(dexdump.c_str())) << dexdump << " should be a valid file path";
 
     ScratchFile dexlayout_output;
-    std::string dexlayout_filename = dexlayout_output.GetFilename();
+    const std::string& dexlayout_filename = dexlayout_output.GetFilename();
     std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
     EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
 
@@ -70,8 +70,8 @@
   // Runs DexFileOutput test.
   bool DexFileOutputExec(std::string* error_msg) {
     ScratchFile tmp_file;
-    std::string tmp_name = tmp_file.GetFilename();
-    size_t tmp_last_slash = tmp_name.rfind("/");
+    const std::string& tmp_name = tmp_file.GetFilename();
+    size_t tmp_last_slash = tmp_name.rfind('/');
     std::string tmp_dir = tmp_name.substr(0, tmp_last_slash + 1);
     std::string dexlayout = GetTestAndroidRoot() + "/bin/dexlayout";
     EXPECT_TRUE(OS::FileExists(dexlayout.c_str())) << dexlayout << " should be a valid file path";
@@ -84,7 +84,7 @@
         return false;
       }
 
-      size_t dex_file_last_slash = dex_file.rfind("/");
+      size_t dex_file_last_slash = dex_file.rfind('/');
       std::string dex_file_name = dex_file.substr(dex_file_last_slash + 1);
       std::vector<std::string> unzip_exec_argv =
           { "/usr/bin/unzip", dex_file, "classes.dex", "-d", tmp_dir};
diff --git a/dexlist/dexlist.cc b/dexlist/dexlist.cc
index a1bde0e..68473c4 100644
--- a/dexlist/dexlist.cc
+++ b/dexlist/dexlist.cc
@@ -213,7 +213,7 @@
  */
 int dexlistDriver(int argc, char** argv) {
   // Art specific set up.
-  InitLogging(argv);
+  InitLogging(argv, Runtime::Aborter);
   MemMap::Init();
 
   // Reset options.
diff --git a/disassembler/disassembler_mips.cc b/disassembler/disassembler_mips.cc
index 9a73f29..4787395 100644
--- a/disassembler/disassembler_mips.cc
+++ b/disassembler/disassembler_mips.cc
@@ -389,6 +389,10 @@
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 15, "floor.w", "fad" },
   { kFpMask | (0x201 << 16), kCop1 | (0x200 << 16) | 17, "movf", "fadc" },
   { kFpMask | (0x201 << 16), kCop1 | (0x201 << 16) | 17, "movt", "fadc" },
+  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 18, "movz", "fadT" },
+  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 19, "movn", "fadT" },
+  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 20, "seleqz", "fadt" },
+  { kFpMask | (0x10 << 21), kCop1 | (0x10 << 21) | 23, "selnez", "fadt" },
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 26, "rint", "fad" },
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 27, "class", "fad" },
   { kFpMask | (0x21f << 16), kCop1 | (0x200 << 16) | 32, "cvt.s", "fad" },
diff --git a/imgdiag/imgdiag.cc b/imgdiag/imgdiag.cc
index ace21aa..a374686 100644
--- a/imgdiag/imgdiag.cc
+++ b/imgdiag/imgdiag.cc
@@ -89,7 +89,7 @@
 
   // Return suffix of the file path after the last /. (e.g. /foo/bar -> bar, bar -> bar)
   static std::string BaseName(const std::string& str) {
-    size_t idx = str.rfind("/");
+    size_t idx = str.rfind('/');
     if (idx == std::string::npos) {
       return str;
     }
@@ -222,9 +222,9 @@
     // Attempt to find fields for all dirty bytes.
     mirror::Class* klass = obj->GetClass();
     if (obj->IsClass()) {
-      os << tabs << "Class " << PrettyClass(obj->AsClass()) << " " << obj << "\n";
+      os << tabs << "Class " << mirror::Class::PrettyClass(obj->AsClass()) << " " << obj << "\n";
     } else {
-      os << tabs << "Instance of " << PrettyClass(klass) << " " << obj << "\n";
+      os << tabs << "Instance of " << mirror::Class::PrettyClass(klass) << " " << obj << "\n";
     }
 
     std::unordered_set<ArtField*> dirty_instance_fields;
@@ -263,7 +263,7 @@
     if (!dirty_instance_fields.empty()) {
       os << tabs << "Dirty instance fields " << dirty_instance_fields.size() << "\n";
       for (ArtField* field : dirty_instance_fields) {
-        os << tabs << PrettyField(field)
+        os << tabs << ArtField::PrettyField(field)
            << " original=" << PrettyFieldValue(field, obj)
            << " remote=" << PrettyFieldValue(field, remote_obj) << "\n";
       }
@@ -271,7 +271,7 @@
     if (!dirty_static_fields.empty()) {
       os << tabs << "Dirty static fields " << dirty_static_fields.size() << "\n";
       for (ArtField* field : dirty_static_fields) {
-        os << tabs << PrettyField(field)
+        os << tabs << ArtField::PrettyField(field)
            << " original=" << PrettyFieldValue(field, obj)
            << " remote=" << PrettyFieldValue(field, remote_obj) << "\n";
       }
@@ -516,8 +516,8 @@
 
       // Sanity check that we are reading a real object
       CHECK(obj->GetClass() != nullptr) << "Image object at address " << obj << " has null class";
-      if (kUseBakerOrBrooksReadBarrier) {
-        obj->AssertReadBarrierPointer();
+      if (kUseBakerReadBarrier) {
+        obj->AssertReadBarrierState();
       }
 
       // Iterate every page this object belongs to
@@ -681,7 +681,7 @@
           class_data[klass].dirty_object_byte_count * 1.0f / object_sizes;
       float avg_object_size = object_sizes * 1.0f / dirty_object_count;
       const std::string& descriptor = class_data[klass].descriptor;
-      os << "    " << PrettyClass(klass) << " ("
+      os << "    " << mirror::Class::PrettyClass(klass) << " ("
          << "objects: " << dirty_object_count << ", "
          << "avg dirty bytes: " << avg_dirty_bytes_per_class << ", "
          << "avg object size: " << avg_object_size << ", "
@@ -789,7 +789,7 @@
       int object_sizes = class_data[klass].false_dirty_byte_count;
       float avg_object_size = object_sizes * 1.0f / object_count;
       const std::string& descriptor = class_data[klass].descriptor;
-      os << "    " << PrettyClass(klass) << " ("
+      os << "    " << mirror::Class::PrettyClass(klass) << " ("
          << "objects: " << object_count << ", "
          << "avg object size: " << avg_object_size << ", "
          << "total bytes: " << object_sizes << ", "
@@ -824,7 +824,7 @@
 
     os << "\n" << "  Clean object count by class:\n";
     for (const auto& vk_pair : clean_object_class_values) {
-      os << "    " << PrettyClass(vk_pair.second) << " (" << vk_pair.first << ")\n";
+      os << "    " << mirror::Class::PrettyClass(vk_pair.second) << " (" << vk_pair.first << ")\n";
     }
 
     return true;
diff --git a/oatdump/oatdump.cc b/oatdump/oatdump.cc
index d6006b2..4c01c14 100644
--- a/oatdump/oatdump.cc
+++ b/oatdump/oatdump.cc
@@ -64,7 +64,9 @@
 #include "string_reference.h"
 #include "thread_list.h"
 #include "type_lookup_table.h"
+#include "vdex_file.h"
 #include "verifier/method_verifier.h"
+#include "verifier/verifier_deps.h"
 #include "well_known_classes.h"
 
 #include <sys/stat.h>
@@ -114,13 +116,13 @@
 
   bool Symbolize() {
     const InstructionSet isa = oat_file_->GetOatHeader().GetInstructionSet();
-    const InstructionSetFeatures* features = InstructionSetFeatures::FromBitmap(
+    std::unique_ptr<const InstructionSetFeatures> features = InstructionSetFeatures::FromBitmap(
         isa, oat_file_->GetOatHeader().GetInstructionSetFeaturesBitmap());
 
     File* elf_file = OS::CreateEmptyFile(output_name_.c_str());
     std::unique_ptr<BufferedOutputStream> output_stream(
         MakeUnique<BufferedOutputStream>(MakeUnique<FileOutputStream>(elf_file)));
-    builder_.reset(new ElfBuilder<ElfTypes>(isa, features, output_stream.get()));
+    builder_.reset(new ElfBuilder<ElfTypes>(isa, features.get(), output_stream.get()));
 
     builder_->Start();
 
@@ -482,6 +484,28 @@
     os << "\n";
 
     if (!options_.dump_header_only_) {
+      VariableIndentationOutputStream vios(&os);
+      VdexFile::Header vdex_header = oat_file_.GetVdexFile()->GetHeader();
+      if (vdex_header.IsValid()) {
+        std::string error_msg;
+        std::vector<const DexFile*> dex_files;
+        for (size_t i = 0; i < oat_dex_files_.size(); i++) {
+          const DexFile* dex_file = OpenDexFile(oat_dex_files_[i], &error_msg);
+          if (dex_file == nullptr) {
+            os << "Error opening dex file: " << error_msg << std::endl;
+            return false;
+          }
+          dex_files.push_back(dex_file);
+        }
+        verifier::VerifierDeps deps(dex_files, oat_file_.GetVdexFile()->GetVerifierDepsData());
+        deps.Dump(&vios);
+      } else {
+        os << "UNRECOGNIZED vdex file, magic "
+           << vdex_header.GetMagic()
+           << ", version "
+           << vdex_header.GetVersion()
+           << "\n";
+      }
       for (size_t i = 0; i < oat_dex_files_.size(); i++) {
         const OatFile::OatDexFile* oat_dex_file = oat_dex_files_[i];
         CHECK(oat_dex_file != nullptr);
@@ -940,7 +964,7 @@
       return success;
     }
 
-    std::string pretty_method = PrettyMethod(dex_method_idx, dex_file, true);
+    std::string pretty_method = dex_file.PrettyMethod(dex_method_idx, true);
     vios->Stream() << StringPrintf("%d: %s (dex_method_idx=%d)\n",
                                    class_method_index, pretty_method.c_str(),
                                    dex_method_idx);
@@ -1029,13 +1053,19 @@
       if (options_.absolute_addresses_) {
         vios->Stream() << StringPrintf("%p ", oat_method.GetVmapTable());
       }
-      uint32_t vmap_table_offset = oat_method.GetVmapTableOffset();
+      uint32_t vmap_table_offset = method_header == nullptr ? 0 : method_header->vmap_table_offset_;
       vios->Stream() << StringPrintf("(offset=0x%08x)\n", vmap_table_offset);
-      if (vmap_table_offset > oat_file_.Size()) {
+
+      size_t vmap_table_offset_limit =
+          (kIsVdexEnabled && IsMethodGeneratedByDexToDexCompiler(oat_method, code_item))
+              ? oat_file_.GetVdexFile()->Size()
+              : method_header->GetCode() - oat_file_.Begin();
+      if (vmap_table_offset >= vmap_table_offset_limit) {
         vios->Stream() << StringPrintf("WARNING: "
                                        "vmap table offset 0x%08x is past end of file 0x%08zx. "
                                        "vmap table offset was loaded from offset 0x%08x.\n",
-                                       vmap_table_offset, oat_file_.Size(),
+                                       vmap_table_offset,
+                                       vmap_table_offset_limit,
                                        oat_method.GetVmapTableOffsetOffset());
         success = false;
       } else if (options_.dump_vmap_) {
@@ -1575,10 +1605,10 @@
       {
         ReaderMutexLock mu(self, *class_linker->DexLock());
         for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
-          mirror::DexCache* dex_cache =
-              down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
+          ObjPtr<mirror::DexCache> dex_cache =
+              ObjPtr<mirror::DexCache>::DownCast(self->DecodeJObject(data.weak_root));
           if (dex_cache != nullptr) {
-            dex_caches_.insert(dex_cache);
+            dex_caches_.insert(dex_cache.Ptr());
           }
         }
       }
@@ -1681,7 +1711,7 @@
 
     virtual void Visit(ArtMethod* method) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
       std::ostream& indent_os = image_dumper_->vios_.Stream();
-      indent_os << method << " " << " ArtMethod: " << PrettyMethod(method) << "\n";
+      indent_os << method << " " << " ArtMethod: " << ArtMethod::PrettyMethod(method) << "\n";
       image_dumper_->DumpMethod(method, indent_os);
       indent_os << "\n";
     }
@@ -1696,16 +1726,16 @@
       REQUIRES_SHARED(Locks::mutator_lock_) {
     CHECK(type != nullptr);
     if (value == nullptr) {
-      os << StringPrintf("null   %s\n", PrettyDescriptor(type).c_str());
+      os << StringPrintf("null   %s\n", type->PrettyDescriptor().c_str());
     } else if (type->IsStringClass()) {
       mirror::String* string = value->AsString();
       os << StringPrintf("%p   String: %s\n", string,
                          PrintableString(string->ToModifiedUtf8().c_str()).c_str());
     } else if (type->IsClassClass()) {
       mirror::Class* klass = value->AsClass();
-      os << StringPrintf("%p   Class: %s\n", klass, PrettyDescriptor(klass).c_str());
+      os << StringPrintf("%p   Class: %s\n", klass, mirror::Class::PrettyDescriptor(klass).c_str());
     } else {
-      os << StringPrintf("%p   %s\n", value.Ptr(), PrettyDescriptor(type).c_str());
+      os << StringPrintf("%p   %s\n", value.Ptr(), type->PrettyDescriptor().c_str());
     }
   }
 
@@ -1825,17 +1855,18 @@
 
     mirror::Class* obj_class = obj->GetClass();
     if (obj_class->IsArrayClass()) {
-      os << StringPrintf("%p: %s length:%d\n", obj, PrettyDescriptor(obj_class).c_str(),
+      os << StringPrintf("%p: %s length:%d\n", obj, obj_class->PrettyDescriptor().c_str(),
                          obj->AsArray()->GetLength());
     } else if (obj->IsClass()) {
       mirror::Class* klass = obj->AsClass();
-      os << StringPrintf("%p: java.lang.Class \"%s\" (", obj, PrettyDescriptor(klass).c_str())
+      os << StringPrintf("%p: java.lang.Class \"%s\" (", obj,
+                         mirror::Class::PrettyDescriptor(klass).c_str())
          << klass->GetStatus() << ")\n";
     } else if (obj_class->IsStringClass()) {
       os << StringPrintf("%p: java.lang.String %s\n", obj,
                          PrintableString(obj->AsString()->ToModifiedUtf8().c_str()).c_str());
     } else {
-      os << StringPrintf("%p: %s\n", obj, PrettyDescriptor(obj_class).c_str());
+      os << StringPrintf("%p: %s\n", obj, obj_class->PrettyDescriptor().c_str());
     }
     ScopedIndentation indent1(&state->vios_);
     DumpFields(os, obj, obj_class);
@@ -1906,7 +1937,7 @@
               msg = "null";
             } else if (method_section.Contains(
                 reinterpret_cast<uint8_t*>(elem) - state->image_space_.Begin())) {
-              msg = PrettyMethod(reinterpret_cast<ArtMethod*>(elem));
+              msg = reinterpret_cast<ArtMethod*>(elem)->PrettyMethod();
             } else {
               msg = "<not in method section>";
             }
@@ -1940,7 +1971,7 @@
               msg = "null";
             } else if (field_section.Contains(
                 reinterpret_cast<uint8_t*>(elem) - state->image_space_.Begin())) {
-              msg = PrettyField(reinterpret_cast<ArtField*>(elem));
+              msg = reinterpret_cast<ArtField*>(elem)->PrettyField();
             } else {
               msg = "<not in field section>";
             }
@@ -1968,7 +1999,7 @@
             if (elem == nullptr) {
               msg = "null";
             } else {
-              msg = PrettyClass(elem);
+              msg = elem->PrettyClass();
             }
             os << StringPrintf("%p   %s\n", elem, msg.c_str());
           }
@@ -2005,7 +2036,8 @@
       if (table != nullptr) {
         indent_os << "IMT conflict table " << table << " method: ";
         for (size_t i = 0, count = table->NumEntries(pointer_size); i < count; ++i) {
-          indent_os << PrettyMethod(table->GetImplementationMethod(i, pointer_size)) << " ";
+          indent_os << ArtMethod::PrettyMethod(table->GetImplementationMethod(i, pointer_size))
+                    << " ";
         }
       }
     } else {
@@ -2207,7 +2239,7 @@
                   os << "\nBig methods (size > " << i << " standard deviations the norm):\n";
                   first = false;
                 }
-                os << PrettyMethod(method_outlier[j]) << " requires storage of "
+                os << ArtMethod::PrettyMethod(method_outlier[j]) << " requires storage of "
                     << PrettySize(cur_size) << "\n";
                 method_outlier_size[j] = 0;  // don't consider this method again
                 dumped_values++;
@@ -2247,7 +2279,7 @@
                       << " standard deviations the norm):\n";
                   first = false;
                 }
-                os << PrettyMethod(method_outlier[j]) << " expanded code by "
+                os << ArtMethod::PrettyMethod(method_outlier[j]) << " expanded code by "
                    << cur_expansion << "\n";
                 method_outlier_expansion[j] = 0.0;  // don't consider this method again
                 dumped_values++;
@@ -2751,7 +2783,7 @@
 
     bool result = klass->GetImt(pointer_size) == object_class->GetImt(pointer_size);
 
-    if (klass->GetIfTable() == nullptr) {
+    if (klass->GetIfTable()->Count() == 0) {
       DCHECK(result);
     }
 
@@ -2771,7 +2803,7 @@
         return;
       }
       table_index++;
-      std::cerr << "    " << PrettyMethod(ptr, true) << std::endl;
+      std::cerr << "    " << ptr->PrettyMethod(true) << std::endl;
     }
   }
 
@@ -2850,25 +2882,30 @@
           PrintTable(current_table, pointer_size);
         }
       } else {
-        std::cerr << "    " << PrettyMethod(ptr, true) << std::endl;
+        std::cerr << "    " << ptr->PrettyMethod(true) << std::endl;
       }
     }
 
     std::cerr << " Interfaces:" << std::endl;
     // Run through iftable, find methods that slot here, see if they fit.
     mirror::IfTable* if_table = klass->GetIfTable();
-    if (if_table != nullptr) {
-      for (size_t i = 0, num_interfaces = klass->GetIfTableCount(); i < num_interfaces; ++i) {
-        mirror::Class* iface = if_table->GetInterface(i);
-        std::string iface_name;
-        std::cerr << "  " << iface->GetDescriptor(&iface_name) << std::endl;
+    for (size_t i = 0, num_interfaces = klass->GetIfTableCount(); i < num_interfaces; ++i) {
+      mirror::Class* iface = if_table->GetInterface(i);
+      std::string iface_name;
+      std::cerr << "  " << iface->GetDescriptor(&iface_name) << std::endl;
 
-        for (ArtMethod& iface_method : iface->GetVirtualMethods(pointer_size)) {
-          uint32_t base_hash = ImTable::GetBaseImtHash(&iface_method);
-          uint32_t imt_slot = ImTable::GetImtIndex(&iface_method);
-          std::cerr << "    " << PrettyMethod(&iface_method, true) << " slot=" << std::dec
-              << imt_slot << " base_hash=0x" << std::hex << base_hash << std::endl;
-        }
+      for (ArtMethod& iface_method : iface->GetVirtualMethods(pointer_size)) {
+        uint32_t class_hash, name_hash, signature_hash;
+        ImTable::GetImtHashComponents(&iface_method, &class_hash, &name_hash, &signature_hash);
+        uint32_t imt_slot = ImTable::GetImtIndex(&iface_method);
+        std::cerr << "    " << iface_method.PrettyMethod(true)
+            << " slot=" << imt_slot
+            << std::hex
+            << " class_hash=0x" << class_hash
+            << " name_hash=0x" << name_hash
+            << " signature_hash=0x" << signature_hash
+            << std::dec
+            << std::endl;
       }
     }
   }
@@ -2913,7 +2950,7 @@
           }
           table_index++;
 
-          std::string p_name = PrettyMethod(ptr2, true);
+          std::string p_name = ptr2->PrettyMethod(true);
           if (StartsWith(p_name, method.c_str())) {
             std::cerr << "  Slot "
                       << index
@@ -2926,25 +2963,23 @@
           }
         }
       } else {
-        std::string p_name = PrettyMethod(ptr, true);
+        std::string p_name = ptr->PrettyMethod(true);
         if (StartsWith(p_name, method.c_str())) {
           std::cerr << "  Slot " << index << " (1)" << std::endl;
           std::cerr << "    " << p_name << std::endl;
         } else {
           // Run through iftable, find methods that slot here, see if they fit.
           mirror::IfTable* if_table = klass->GetIfTable();
-          if (if_table != nullptr) {
-            for (size_t i = 0, num_interfaces = klass->GetIfTableCount(); i < num_interfaces; ++i) {
-              mirror::Class* iface = if_table->GetInterface(i);
-              size_t num_methods = iface->NumDeclaredVirtualMethods();
-              if (num_methods > 0) {
-                for (ArtMethod& iface_method : iface->GetMethods(pointer_size)) {
-                  if (ImTable::GetImtIndex(&iface_method) == index) {
-                    std::string i_name = PrettyMethod(&iface_method, true);
-                    if (StartsWith(i_name, method.c_str())) {
-                      std::cerr << "  Slot " << index << " (1)" << std::endl;
-                      std::cerr << "    " << p_name << " (" << i_name << ")" << std::endl;
-                    }
+          for (size_t i = 0, num_interfaces = klass->GetIfTableCount(); i < num_interfaces; ++i) {
+            mirror::Class* iface = if_table->GetInterface(i);
+            size_t num_methods = iface->NumDeclaredVirtualMethods();
+            if (num_methods > 0) {
+              for (ArtMethod& iface_method : iface->GetMethods(pointer_size)) {
+                if (ImTable::GetImtIndex(&iface_method) == index) {
+                  std::string i_name = iface_method.PrettyMethod(true);
+                  if (StartsWith(i_name, method.c_str())) {
+                    std::cerr << "  Slot " << index << " (1)" << std::endl;
+                    std::cerr << "    " << p_name << " (" << i_name << ")" << std::endl;
                   }
                 }
               }
diff --git a/patchoat/patchoat.cc b/patchoat/patchoat.cc
index f3eb663..5dc1457 100644
--- a/patchoat/patchoat.cc
+++ b/patchoat/patchoat.cc
@@ -442,7 +442,7 @@
     return ERROR_OAT_FILE;
   }
 
-  const std::string& file_path = oat_in->GetFile().GetPath();
+  const std::string& file_path = oat_in->GetFilePath();
 
   const OatHeader* oat_header = GetOatHeader(oat_in);
   if (oat_header == nullptr) {
@@ -677,6 +677,16 @@
         mirror::DexCache::SetElementPtrSize(copy_fields, j, copy, pointer_size);
       }
     }
+    mirror::MethodTypeDexCacheType* orig_method_types = orig_dex_cache->GetResolvedMethodTypes();
+    mirror::MethodTypeDexCacheType* relocated_method_types =
+        RelocatedAddressOfPointer(orig_method_types);
+    copy_dex_cache->SetField64<false>(
+        mirror::DexCache::ResolvedMethodTypesOffset(),
+        static_cast<int64_t>(reinterpret_cast<uintptr_t>(relocated_method_types)));
+    if (orig_method_types != nullptr) {
+      orig_dex_cache->FixupResolvedMethodTypes(RelocatedCopyOf(orig_method_types),
+                                               RelocatedPointerVisitor(this));
+    }
   }
 }
 
@@ -715,15 +725,16 @@
 }
 
 
-void PatchOat::PatchVisitor::operator() (mirror::Object* obj, MemberOffset off,
+void PatchOat::PatchVisitor::operator() (ObjPtr<mirror::Object> obj,
+                                         MemberOffset off,
                                          bool is_static_unused ATTRIBUTE_UNUSED) const {
   mirror::Object* referent = obj->GetFieldObject<mirror::Object, kVerifyNone>(off);
   mirror::Object* moved_object = patcher_->RelocatedAddressOfPointer(referent);
   copy_->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(off, moved_object);
 }
 
-void PatchOat::PatchVisitor::operator() (mirror::Class* cls ATTRIBUTE_UNUSED,
-                                         mirror::Reference* ref) const {
+void PatchOat::PatchVisitor::operator() (ObjPtr<mirror::Class> cls ATTRIBUTE_UNUSED,
+                                         ObjPtr<mirror::Reference> ref) const {
   MemberOffset off = mirror::Reference::ReferentOffset();
   mirror::Object* referent = ref->GetReferent();
   DCHECK(referent == nullptr ||
@@ -736,13 +747,8 @@
 void PatchOat::VisitObject(mirror::Object* object) {
   mirror::Object* copy = RelocatedCopyOf(object);
   CHECK(copy != nullptr);
-  if (kUseBakerOrBrooksReadBarrier) {
-    object->AssertReadBarrierPointer();
-    if (kUseBrooksReadBarrier) {
-      mirror::Object* moved_to = RelocatedAddressOfPointer(object);
-      copy->SetReadBarrierPointer(moved_to);
-      DCHECK_EQ(copy->GetReadBarrierPointer(), moved_to);
-    }
+  if (kUseBakerReadBarrier) {
+    object->AssertReadBarrierState();
   }
   PatchOat::PatchVisitor visitor(this, copy);
   object->VisitReferences<kVerifyNone>(visitor, visitor);
@@ -756,16 +762,14 @@
     if (vtable != nullptr) {
       vtable->Fixup(RelocatedCopyOfFollowImages(vtable), pointer_size, native_visitor);
     }
-    auto* iftable = klass->GetIfTable();
-    if (iftable != nullptr) {
-      for (int32_t i = 0; i < klass->GetIfTableCount(); ++i) {
-        if (iftable->GetMethodArrayCount(i) > 0) {
-          auto* method_array = iftable->GetMethodArray(i);
-          CHECK(method_array != nullptr);
-          method_array->Fixup(RelocatedCopyOfFollowImages(method_array),
-                              pointer_size,
-                              native_visitor);
-        }
+    mirror::IfTable* iftable = klass->GetIfTable();
+    for (int32_t i = 0; i < klass->GetIfTableCount(); ++i) {
+      if (iftable->GetMethodArrayCount(i) > 0) {
+        auto* method_array = iftable->GetMethodArray(i);
+        CHECK(method_array != nullptr);
+        method_array->Fixup(RelocatedCopyOfFollowImages(method_array),
+                            pointer_size,
+                            native_visitor);
       }
     }
   } else if (object->GetClass() == mirror::Method::StaticClass() ||
@@ -846,7 +850,7 @@
   }
   OatHeader* oat_header = reinterpret_cast<OatHeader*>(oat_file->Begin() + rodata_sec->sh_offset);
   if (!oat_header->IsValid()) {
-    LOG(ERROR) << "Elf file " << oat_file->GetFile().GetPath() << " has an invalid oat header";
+    LOG(ERROR) << "Elf file " << oat_file->GetFilePath() << " has an invalid oat header";
     return false;
   }
   oat_header->RelocateOat(delta_);
@@ -854,10 +858,11 @@
 }
 
 bool PatchOat::PatchElf() {
-  if (oat_file_->Is64Bit())
+  if (oat_file_->Is64Bit()) {
     return PatchElf<ElfFileImpl64>(oat_file_->GetImpl64());
-  else
+  } else {
     return PatchElf<ElfFileImpl32>(oat_file_->GetImpl32());
+  }
 }
 
 template <typename ElfFileImpl>
@@ -1061,7 +1066,7 @@
   TimingLogger::ScopedTiming pt("patch image and oat", &timings);
 
   std::string output_directory =
-      output_image_filename.substr(0, output_image_filename.find_last_of("/"));
+      output_image_filename.substr(0, output_image_filename.find_last_of('/'));
   bool ret = PatchOat::Patch(input_image_location, base_delta, output_directory, isa, &timings);
 
   if (kIsDebugBuild) {
@@ -1358,15 +1363,13 @@
 }
 
 static int patchoat(int argc, char **argv) {
-  InitLogging(argv);
+  InitLogging(argv, Runtime::Aborter);
   MemMap::Init();
   const bool debug = kIsDebugBuild;
   orig_argc = argc;
   orig_argv = argv;
   TimingLogger timings("patcher", false, false);
 
-  InitLogging(argv);
-
   // Skip over the command name.
   argv++;
   argc--;
diff --git a/patchoat/patchoat.h b/patchoat/patchoat.h
index a97b051..e7a3e91 100644
--- a/patchoat/patchoat.h
+++ b/patchoat/patchoat.h
@@ -198,10 +198,10 @@
   public:
     PatchVisitor(PatchOat* patcher, mirror::Object* copy) : patcher_(patcher), copy_(copy) {}
     ~PatchVisitor() {}
-    void operator() (mirror::Object* obj, MemberOffset off, bool b) const
+    void operator() (ObjPtr<mirror::Object> obj, MemberOffset off, bool b) const
         REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
     // For reference classes.
-    void operator() (mirror::Class* cls, mirror::Reference* ref) const
+    void operator() (ObjPtr<mirror::Class> cls, ObjPtr<mirror::Reference>  ref) const
         REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
     // TODO: Consider using these for updating native class roots?
     void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root ATTRIBUTE_UNUSED)
diff --git a/profman/profman.cc b/profman/profman.cc
index 7722e80..bfef834 100644
--- a/profman/profman.cc
+++ b/profman/profman.cc
@@ -33,6 +33,7 @@
 #include "base/unix_file/fd_file.h"
 #include "dex_file.h"
 #include "jit/offline_profiling_info.h"
+#include "runtime.h"
 #include "utils.h"
 #include "zip_archive.h"
 #include "profile_assistant.h"
@@ -143,7 +144,7 @@
     original_argc = argc;
     original_argv = argv;
 
-    InitLogging(argv);
+    InitLogging(argv, Runtime::Aborter);
 
     // Skip over the command name.
     argv++;
@@ -353,7 +354,7 @@
   }
 
   int GenerateTestProfile() {
-    int profile_test_fd = open(test_profile_.c_str(), O_CREAT | O_TRUNC | O_WRONLY);
+    int profile_test_fd = open(test_profile_.c_str(), O_CREAT | O_TRUNC | O_WRONLY, 0644);
     if (profile_test_fd < 0) {
       std::cerr << "Cannot open " << test_profile_ << strerror(errno);
       return -1;
diff --git a/runtime/Android.bp b/runtime/Android.bp
index 31f2490..c6f479f 100644
--- a/runtime/Android.bp
+++ b/runtime/Android.bp
@@ -120,9 +120,12 @@
         "linear_alloc.cc",
         "mem_map.cc",
         "memory_region.cc",
+        "method_handles.cc",
         "mirror/array.cc",
         "mirror/class.cc",
+        "mirror/class_ext.cc",
         "mirror/dex_cache.cc",
+        "mirror/emulated_stack_frame.cc",
         "mirror/executable.cc",
         "mirror/field.cc",
         "mirror/method.cc",
@@ -403,7 +406,8 @@
 
 gensrcs {
     name: "art_operator_srcs",
-    cmd: "art/tools/generate-operator-out.py art/runtime $in > $out",
+    cmd: "$(location generate-operator-out.py) art/runtime $(in) > $(out)",
+    tool_files: ["generate-operator-out.py"],
     srcs: [
         "arch/instruction_set.h",
         "base/allocator.h",
@@ -536,6 +540,7 @@
         "gc/task_processor_test.cc",
         "gtest_test.cc",
         "handle_scope_test.cc",
+        "imtable_test.cc",
         "indenter_test.cc",
         "indirect_reference_table_test.cc",
         "instrumentation_test.cc",
@@ -564,7 +569,6 @@
         "utils_test.cc",
         "verifier/method_verifier_test.cc",
         "verifier/reg_type_test.cc",
-        "verifier/verifier_deps_test.cc",
         "zip_archive_test.cc",
     ],
     shared_libs: [
diff --git a/runtime/arch/arm/entrypoints_init_arm.cc b/runtime/arch/arm/entrypoints_init_arm.cc
index cb8edff..de72d3a 100644
--- a/runtime/arch/arm/entrypoints_init_arm.cc
+++ b/runtime/arch/arm/entrypoints_init_arm.cc
@@ -30,8 +30,7 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
-                                          const mirror::Class* ref_class);
+extern "C" size_t artInstanceOfFromCode(mirror::Object* obj, mirror::Class* ref_class);
 
 // Read barrier entrypoints.
 // art_quick_read_barrier_mark_regX uses an non-standard calling
@@ -68,12 +67,27 @@
 // Long long arithmetics - REM_LONG[_2ADDR] and DIV_LONG[_2ADDR]
 extern "C" int64_t __aeabi_ldivmod(int64_t, int64_t);
 
+void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) {
+  qpoints->pReadBarrierMarkReg00 = is_marking ? art_quick_read_barrier_mark_reg00 : nullptr;
+  qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr;
+  qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr;
+  qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr;
+  qpoints->pReadBarrierMarkReg04 = is_marking ? art_quick_read_barrier_mark_reg04 : nullptr;
+  qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr;
+  qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr;
+  qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr;
+  qpoints->pReadBarrierMarkReg08 = is_marking ? art_quick_read_barrier_mark_reg08 : nullptr;
+  qpoints->pReadBarrierMarkReg09 = is_marking ? art_quick_read_barrier_mark_reg09 : nullptr;
+  qpoints->pReadBarrierMarkReg10 = is_marking ? art_quick_read_barrier_mark_reg10 : nullptr;
+  qpoints->pReadBarrierMarkReg11 = is_marking ? art_quick_read_barrier_mark_reg11 : nullptr;
+}
+
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   DefaultInitEntryPoints(jpoints, qpoints);
 
   // Cast
-  qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
-  qpoints->pCheckCast = art_quick_check_cast;
+  qpoints->pInstanceofNonTrivial = artInstanceOfFromCode;
+  qpoints->pCheckInstanceOf = art_quick_check_instance_of;
 
   // Math
   qpoints->pIdivmod = __aeabi_idivmod;
@@ -124,18 +138,7 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
-  qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
-  qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
-  qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
-  qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
-  qpoints->pReadBarrierMarkReg04 = art_quick_read_barrier_mark_reg04;
-  qpoints->pReadBarrierMarkReg05 = art_quick_read_barrier_mark_reg05;
-  qpoints->pReadBarrierMarkReg06 = art_quick_read_barrier_mark_reg06;
-  qpoints->pReadBarrierMarkReg07 = art_quick_read_barrier_mark_reg07;
-  qpoints->pReadBarrierMarkReg08 = art_quick_read_barrier_mark_reg08;
-  qpoints->pReadBarrierMarkReg09 = art_quick_read_barrier_mark_reg09;
-  qpoints->pReadBarrierMarkReg10 = art_quick_read_barrier_mark_reg10;
-  qpoints->pReadBarrierMarkReg11 = art_quick_read_barrier_mark_reg11;
+  UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false);
   qpoints->pReadBarrierMarkReg12 = nullptr;  // Cannot use register 12 (IP) to pass arguments.
   qpoints->pReadBarrierMarkReg13 = nullptr;  // Cannot use register 13 (SP) to pass arguments.
   qpoints->pReadBarrierMarkReg14 = nullptr;  // Cannot use register 14 (LR) to pass arguments.
diff --git a/runtime/arch/arm/instruction_set_features_arm.cc b/runtime/arch/arm/instruction_set_features_arm.cc
index c3a5829..c81a93c 100644
--- a/runtime/arch/arm/instruction_set_features_arm.cc
+++ b/runtime/arch/arm/instruction_set_features_arm.cc
@@ -33,7 +33,7 @@
 
 namespace art {
 
-const ArmInstructionSetFeatures* ArmInstructionSetFeatures::FromVariant(
+ArmFeaturesUniquePtr ArmInstructionSetFeatures::FromVariant(
     const std::string& variant, std::string* error_msg) {
   // Assume all ARM processors are SMP.
   // TODO: set the SMP support based on variant.
@@ -69,7 +69,7 @@
     if (FindVariantInArray(unsupported_arm_variants, arraysize(unsupported_arm_variants),
                            variant)) {
       *error_msg = StringPrintf("Attempt to use unsupported ARM variant: %s", variant.c_str());
-      return nullptr;
+      return ArmFeaturesUniquePtr();
     }
     // Warn if the variant is unknown.
     // TODO: some of the variants below may have feature support, but that support is currently
@@ -97,17 +97,17 @@
           << ") using conservative defaults";
     }
   }
-  return new ArmInstructionSetFeatures(smp, has_div, has_lpae);
+  return ArmFeaturesUniquePtr(new ArmInstructionSetFeatures(smp, has_div, has_lpae));
 }
 
-const ArmInstructionSetFeatures* ArmInstructionSetFeatures::FromBitmap(uint32_t bitmap) {
+ArmFeaturesUniquePtr ArmInstructionSetFeatures::FromBitmap(uint32_t bitmap) {
   bool smp = (bitmap & kSmpBitfield) != 0;
   bool has_div = (bitmap & kDivBitfield) != 0;
   bool has_atomic_ldrd_strd = (bitmap & kAtomicLdrdStrdBitfield) != 0;
-  return new ArmInstructionSetFeatures(smp, has_div, has_atomic_ldrd_strd);
+  return ArmFeaturesUniquePtr(new ArmInstructionSetFeatures(smp, has_div, has_atomic_ldrd_strd));
 }
 
-const ArmInstructionSetFeatures* ArmInstructionSetFeatures::FromCppDefines() {
+ArmFeaturesUniquePtr ArmInstructionSetFeatures::FromCppDefines() {
   const bool smp = true;
 #if defined(__ARM_ARCH_EXT_IDIV__)
   const bool has_div = true;
@@ -119,10 +119,10 @@
 #else
   const bool has_lpae = false;
 #endif
-  return new ArmInstructionSetFeatures(smp, has_div, has_lpae);
+  return ArmFeaturesUniquePtr(new ArmInstructionSetFeatures(smp, has_div, has_lpae));
 }
 
-const ArmInstructionSetFeatures* ArmInstructionSetFeatures::FromCpuInfo() {
+ArmFeaturesUniquePtr ArmInstructionSetFeatures::FromCpuInfo() {
   // Look in /proc/cpuinfo for features we need.  Only use this when we can guarantee that
   // the kernel puts the appropriate feature flags in here.  Sometimes it doesn't.
   bool smp = false;
@@ -157,10 +157,10 @@
   } else {
     LOG(ERROR) << "Failed to open /proc/cpuinfo";
   }
-  return new ArmInstructionSetFeatures(smp, has_div, has_lpae);
+  return ArmFeaturesUniquePtr(new ArmInstructionSetFeatures(smp, has_div, has_lpae));
 }
 
-const ArmInstructionSetFeatures* ArmInstructionSetFeatures::FromHwcap() {
+ArmFeaturesUniquePtr ArmInstructionSetFeatures::FromHwcap() {
   bool smp = sysconf(_SC_NPROCESSORS_CONF) > 1;
 
   bool has_div = false;
@@ -180,7 +180,7 @@
   }
 #endif
 
-  return new ArmInstructionSetFeatures(smp, has_div, has_lpae);
+  return ArmFeaturesUniquePtr(new ArmInstructionSetFeatures(smp, has_div, has_lpae));
 }
 
 // A signal handler called by a fault for an illegal instruction.  We record the fact in r0
@@ -198,7 +198,7 @@
 #endif
 }
 
-const ArmInstructionSetFeatures* ArmInstructionSetFeatures::FromAssembly() {
+ArmFeaturesUniquePtr ArmInstructionSetFeatures::FromAssembly() {
   const bool smp = true;
 
   // See if have a sdiv instruction.  Register a signal handler and try to execute an sdiv
@@ -226,7 +226,7 @@
 #else
   const bool has_lpae = false;
 #endif
-  return new ArmInstructionSetFeatures(smp, has_div, has_lpae);
+  return ArmFeaturesUniquePtr(new ArmInstructionSetFeatures(smp, has_div, has_lpae));
 }
 
 bool ArmInstructionSetFeatures::Equals(const InstructionSetFeatures* other) const {
@@ -265,7 +265,8 @@
   return result;
 }
 
-const InstructionSetFeatures* ArmInstructionSetFeatures::AddFeaturesFromSplitString(
+std::unique_ptr<const InstructionSetFeatures>
+ArmInstructionSetFeatures::AddFeaturesFromSplitString(
     const bool smp, const std::vector<std::string>& features, std::string* error_msg) const {
   bool has_atomic_ldrd_strd = has_atomic_ldrd_strd_;
   bool has_div = has_div_;
@@ -284,7 +285,8 @@
       return nullptr;
     }
   }
-  return new ArmInstructionSetFeatures(smp, has_div, has_atomic_ldrd_strd);
+  return std::unique_ptr<const InstructionSetFeatures>(
+      new ArmInstructionSetFeatures(smp, has_div, has_atomic_ldrd_strd));
 }
 
 }  // namespace art
diff --git a/runtime/arch/arm/instruction_set_features_arm.h b/runtime/arch/arm/instruction_set_features_arm.h
index 221bf1f..204d1d7 100644
--- a/runtime/arch/arm/instruction_set_features_arm.h
+++ b/runtime/arch/arm/instruction_set_features_arm.h
@@ -21,29 +21,31 @@
 
 namespace art {
 
+class ArmInstructionSetFeatures;
+using ArmFeaturesUniquePtr = std::unique_ptr<const ArmInstructionSetFeatures>;
+
 // Instruction set features relevant to the ARM architecture.
 class ArmInstructionSetFeatures FINAL : public InstructionSetFeatures {
  public:
   // Process a CPU variant string like "krait" or "cortex-a15" and create InstructionSetFeatures.
-  static const ArmInstructionSetFeatures* FromVariant(const std::string& variant,
-                                                      std::string* error_msg);
+  static ArmFeaturesUniquePtr FromVariant(const std::string& variant, std::string* error_msg);
 
   // Parse a bitmap and create an InstructionSetFeatures.
-  static const ArmInstructionSetFeatures* FromBitmap(uint32_t bitmap);
+  static ArmFeaturesUniquePtr FromBitmap(uint32_t bitmap);
 
   // Turn C pre-processor #defines into the equivalent instruction set features.
-  static const ArmInstructionSetFeatures* FromCppDefines();
+  static ArmFeaturesUniquePtr FromCppDefines();
 
   // Process /proc/cpuinfo and use kRuntimeISA to produce InstructionSetFeatures.
-  static const ArmInstructionSetFeatures* FromCpuInfo();
+  static ArmFeaturesUniquePtr FromCpuInfo();
 
   // Process the auxiliary vector AT_HWCAP entry and use kRuntimeISA to produce
   // InstructionSetFeatures.
-  static const ArmInstructionSetFeatures* FromHwcap();
+  static ArmFeaturesUniquePtr FromHwcap();
 
   // Use assembly tests of the current runtime (ie kRuntimeISA) to determine the
   // InstructionSetFeatures. This works around kernel bugs in AT_HWCAP and /proc/cpuinfo.
-  static const ArmInstructionSetFeatures* FromAssembly();
+  static ArmFeaturesUniquePtr FromAssembly();
 
   bool Equals(const InstructionSetFeatures* other) const OVERRIDE;
 
@@ -71,7 +73,7 @@
 
  protected:
   // Parse a vector of the form "div", "lpae" adding these to a new ArmInstructionSetFeatures.
-  const InstructionSetFeatures*
+  std::unique_ptr<const InstructionSetFeatures>
       AddFeaturesFromSplitString(const bool smp, const std::vector<std::string>& features,
                                  std::string* error_msg) const OVERRIDE;
 
diff --git a/runtime/arch/arm/quick_entrypoints_arm.S b/runtime/arch/arm/quick_entrypoints_arm.S
index cdb4c25..3a83eaf 100644
--- a/runtime/arch/arm/quick_entrypoints_arm.S
+++ b/runtime/arch/arm/quick_entrypoints_arm.S
@@ -239,6 +239,30 @@
     .cfi_adjust_cfa_offset -56
 .endm
 
+.macro RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0
+    add  sp, #8                         @ rewind sp
+    .cfi_adjust_cfa_offset -8
+    vpop {d0-d15}
+    .cfi_adjust_cfa_offset -128
+    add  sp, #4                         @ skip r0
+    .cfi_adjust_cfa_offset -4
+    .cfi_restore r0                     @ debugger can no longer restore caller's r0
+    pop {r1-r12, lr}                    @ 13 words of callee saves
+    .cfi_restore r1
+    .cfi_restore r2
+    .cfi_restore r3
+    .cfi_restore r5
+    .cfi_restore r6
+    .cfi_restore r7
+    .cfi_restore r8
+    .cfi_restore r9
+    .cfi_restore r10
+    .cfi_restore r11
+    .cfi_restore r12
+    .cfi_restore lr
+    .cfi_adjust_cfa_offset -52
+.endm
+
 .macro RETURN_IF_RESULT_IS_ZERO
     cbnz   r0, 1f              @ result non-zero branch over
     bx     lr                  @ return
@@ -252,17 +276,23 @@
 .endm
 
     /*
-     * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
-     * exception is Thread::Current()->exception_
+     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
+     * exception is Thread::Current()->exception_ when the runtime method frame is ready.
      */
-.macro DELIVER_PENDING_EXCEPTION
-    .fnend
-    .fnstart
-    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0       @ save callee saves for throw
+.macro DELIVER_PENDING_EXCEPTION_FRAME_READY
     mov    r0, r9                              @ pass Thread::Current
     bl     artDeliverPendingExceptionFromCode  @ artDeliverPendingExceptionFromCode(Thread*)
 .endm
 
+    /*
+     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
+     * exception is Thread::Current()->exception_.
+     */
+.macro DELIVER_PENDING_EXCEPTION
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r0       @ save callee saves for throw
+    DELIVER_PENDING_EXCEPTION_FRAME_READY
+.endm
+
 .macro NO_ARG_RUNTIME_EXCEPTION c_name, cxx_name
     .extern \cxx_name
 ENTRY \c_name
@@ -734,11 +764,12 @@
 END art_quick_unlock_object_no_inline
 
     /*
-     * Entry from managed code that calls artIsAssignableFromCode and on failure calls
-     * artThrowClassCastException.
+     * Entry from managed code that calls artInstanceOfFromCode and on failure calls
+     * artThrowClassCastExceptionForObject.
      */
-    .extern artThrowClassCastException
-ENTRY art_quick_check_cast
+    .extern artInstanceOfFromCode
+    .extern artThrowClassCastExceptionForObject
+ENTRY art_quick_check_instance_of
     push {r0-r1, lr}                    @ save arguments, link register and pad
     .cfi_adjust_cfa_offset 12
     .cfi_rel_offset r0, 0
@@ -746,7 +777,7 @@
     .cfi_rel_offset lr, 8
     sub sp, #4
     .cfi_adjust_cfa_offset 4
-    bl artIsAssignableFromCode
+    bl artInstanceOfFromCode
     cbz    r0, .Lthrow_class_cast_exception
     add sp, #4
     .cfi_adjust_cfa_offset -4
@@ -762,9 +793,9 @@
     .cfi_restore lr
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME r2       @ save all registers as basis for long jump context
     mov r2, r9                      @ pass Thread::Current
-    bl  artThrowClassCastException  @ (Class*, Class*, Thread*)
+    bl  artThrowClassCastExceptionForObject  @ (Object*, Class*, Thread*)
     bkpt
-END art_quick_check_cast
+END art_quick_check_instance_of
 
 // Restore rReg's value from [sp, #offset] if rReg is not the same as rExclude.
 .macro POP_REG_NE rReg, offset, rExclude
@@ -1078,41 +1109,71 @@
      */
 
 ENTRY art_quick_resolve_string
-    ldr    r1, [sp]                                              @ load referrer
-    ldr    r1, [r1, #ART_METHOD_DECLARING_CLASS_OFFSET]          @ load declaring class
-    ldr    r1, [r1, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET]   @ load string dex cache
-    ubfx   r2, r0, #0, #STRING_DEX_CACHE_HASH_BITS
-    add    r1, r1, r2, LSL #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT
-    ldrd   r2, r3, [r1]                                    @ load index into r3 and pointer into r2
-    cmp    r0, r3
+    push   {r10-r12, lr}
+    .cfi_adjust_cfa_offset 16
+    .cfi_rel_offset r10, 0
+    .cfi_rel_offset r11, 4
+    .cfi_rel_offset ip, 8
+    .cfi_rel_offset lr, 12
+    ldr    r10, [sp, #16]                                        @ load referrer
+    ldr    r10, [r10, #ART_METHOD_DECLARING_CLASS_OFFSET]        @ load declaring class
+    ldr    r10, [r10, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET] @ load string dex cache
+    ubfx   r11, r0, #0, #STRING_DEX_CACHE_HASH_BITS
+    add    r10, r10, r11, LSL #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT
+    ldrd   r10, r11, [r10]                               @ load index into r11 and pointer into r10
+    cmp    r0, r11
     bne    .Lart_quick_resolve_string_slow_path
 #ifdef USE_READ_BARRIER
-    ldr    r3, [rSELF, #THREAD_IS_GC_MARKING_OFFSET]
-    cbnz   r3, .Lart_quick_resolve_string_marking
+    ldr    r0, [rSELF, #THREAD_IS_GC_MARKING_OFFSET]
+    cbnz   r0, .Lart_quick_resolve_string_marking
+.Lart_quick_resolve_string_no_rb:
 #endif
-    mov    r0, r2
-    bx     lr
-// Slow path case, the index did not match
-.Lart_quick_resolve_string_slow_path:
-    SETUP_SAVE_REFS_ONLY_FRAME r2                    @ save callee saves in case of GC
-    mov    r1, r9                                    @ pass Thread::Current
-    mov    r3, sp
-    bl     artResolveStringFromCode                  @ (uint32_t type_idx, Method* method, Thread*)
-    RESTORE_SAVE_REFS_ONLY_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
+    mov    r0, r10
+    pop    {r10-r12, pc}
+
+#ifdef USE_READ_BARRIER
 // GC is marking case, need to check the mark bit.
 .Lart_quick_resolve_string_marking:
-    ldr    r3, [r2, MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    tst    r3, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
-    mov    r0, r2
-    bne    .Lart_quick_resolve_string_no_rb
-    push   {r1, r2, r3, lr}                          @ Save x1, LR
-    .cfi_adjust_cfa_offset 16
-    bl     artReadBarrierMark                        @ Get the marked string back.
-    pop    {r1, r2, r3, lr}                          @ Restore registers.
+    ldr    r0, [r10, MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    lsrs   r0, #(LOCK_WORD_MARK_BIT_SHIFT + 1)
+    bcs    .Lart_quick_resolve_string_no_rb
+    mov    r0, r10
+    .cfi_remember_state
+    pop    {r10-r12, lr}
     .cfi_adjust_cfa_offset -16
-.Lart_quick_resolve_string_no_rb:
+    .cfi_restore r10
+    .cfi_restore r11
+    .cfi_restore r12
+    .cfi_restore lr
+    // Note: art_quick_read_barrier_mark_reg00 clobbers IP but the .Lslow_rb_* does not.
+    b      .Lslow_rb_art_quick_read_barrier_mark_reg00  @ Get the marked string back.
+    .cfi_restore_state
+#endif
+
+// Slow path case, the index did not match
+.Lart_quick_resolve_string_slow_path:
+    push {r0-r9}                  @ 10 words of callee saves and args; {r10-r12, lr} already saved.
+    .cfi_adjust_cfa_offset 40
+    .cfi_rel_offset r0, 0
+    .cfi_rel_offset r1, 4
+    .cfi_rel_offset r2, 8
+    .cfi_rel_offset r3, 12
+    .cfi_rel_offset r4, 16
+    .cfi_rel_offset r5, 20
+    .cfi_rel_offset r6, 24
+    .cfi_rel_offset r7, 28
+    .cfi_rel_offset r8, 32
+    .cfi_rel_offset r9, 36
+    SETUP_SAVE_EVERYTHING_FRAME_CORE_REGS_SAVED r1   @ save callee saves in case of GC
+    mov    r1, r9                                    @ pass Thread::Current
+    bl     artResolveStringFromCode                  @ (uint32_t type_idx, Thread*)
+    cbz    r0, 1f                                    @ If result is null, deliver the OOME.
+    .cfi_remember_state
+    RESTORE_SAVE_EVERYTHING_FRAME_KEEP_R0
     bx     lr
+    .cfi_restore_state
+1:
+    DELIVER_PENDING_EXCEPTION_FRAME_READY
 END art_quick_resolve_string
 
 // Generate the allocation entrypoints for each allocator.
@@ -1708,12 +1769,15 @@
     .cfi_rel_offset r10, 4
     .cfi_rel_offset r11, 8
     .cfi_rel_offset lr, 12
+#if (STRING_COMPRESSION_FEATURE)
+    ldr   r4, [r0, #MIRROR_STRING_COUNT_OFFSET]
+#else
     ldr   r3, [r0, #MIRROR_STRING_COUNT_OFFSET]
+#endif
     add   r0, #MIRROR_STRING_VALUE_OFFSET
 #if (STRING_COMPRESSION_FEATURE)
     /* r4 count (with flag) and r3 holds actual length */
-    mov   r4, r3
-    bic   r3, #2147483648
+    lsr   r3, r4, #1
 #endif
     /* Clamp start to [0..count] */
     cmp   r2, #0
@@ -1728,8 +1792,8 @@
 
     /* Build pointer to start of data to compare and pre-bias */
 #if (STRING_COMPRESSION_FEATURE)
-    cmp   r4, #0
-    blt   .Lstring_indexof_compressed
+    lsrs  r4, r4, #1
+    bcc   .Lstring_indexof_compressed
 #endif
     add   r0, r0, r2, lsl #1
     sub   r0, #2
@@ -1920,6 +1984,8 @@
      * getting its argument and returning its result through register
      * `reg`, saving and restoring all caller-save registers.
      *
+     * IP is clobbered; `reg` must not be IP.
+     *
      * If `reg` is different from `r0`, the generated function follows a
      * non-standard runtime calling convention:
      * - register `reg` is used to pass the (sole) argument of this
@@ -1936,36 +2002,83 @@
     SMART_CBZ \reg, .Lret_rb_\name
     // Check lock word for mark bit, if marked return. Use IP for scratch since it is blocked.
     ldr ip, [\reg, MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    ands ip, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
-    beq .Lslow_rb_\name
+    tst ip, #LOCK_WORD_MARK_BIT_MASK_SHIFTED
+    beq .Lnot_marked_rb_\name
     // Already marked, return right away.
+.Lret_rb_\name:
     bx lr
 
+.Lnot_marked_rb_\name:
+    // Test that both the forwarding state bits are 1.
+    mvn ip, ip
+    tst ip, #(LOCK_WORD_STATE_FORWARDING_ADDRESS << LOCK_WORD_STATE_SHIFT)
+    beq .Lret_forwarding_address\name
+
 .Lslow_rb_\name:
-    push  {r0-r5, r9, lr}               @ save return address and core caller-save registers
-                                        @ also save callee save r5 for 16 byte alignment
+    // Save IP: the kSaveEverything entrypoint art_quick_resolve_string makes a tail call here.
+    push  {r0-r4, r9, ip, lr}           @ save return address, core caller-save registers and ip
     .cfi_adjust_cfa_offset 32
     .cfi_rel_offset r0, 0
     .cfi_rel_offset r1, 4
     .cfi_rel_offset r2, 8
     .cfi_rel_offset r3, 12
     .cfi_rel_offset r4, 16
-    .cfi_rel_offset r5, 20
-    .cfi_rel_offset r9, 24
+    .cfi_rel_offset r9, 20
+    .cfi_rel_offset ip, 24
     .cfi_rel_offset lr, 28
-    vpush {s0-s15}                      @ save floating-point caller-save registers
-    .cfi_adjust_cfa_offset 64
 
     .ifnc \reg, r0
       mov   r0, \reg                    @ pass arg1 - obj from `reg`
     .endif
+
+    vpush {s0-s15}                      @ save floating-point caller-save registers
+    .cfi_adjust_cfa_offset 64
     bl    artReadBarrierMark            @ r0 <- artReadBarrierMark(obj)
-    mov ip, r0                          @ Save result in IP
     vpop {s0-s15}                       @ restore floating-point registers
     .cfi_adjust_cfa_offset -64
-    pop   {r0-r5, r9, lr}               @ restore caller-save registers
-    mov \reg, ip                        @ copy result to reg
-.Lret_rb_\name:
+
+    .ifc \reg, r0                       @ Save result to the stack slot or destination register.
+      str r0, [sp, #0]
+    .else
+      .ifc \reg, r1
+        str r0, [sp, #4]
+      .else
+        .ifc \reg, r2
+          str r0, [sp, #8]
+        .else
+          .ifc \reg, r3
+            str r0, [sp, #12]
+          .else
+            .ifc \reg, r4
+              str r0, [sp, #16]
+            .else
+              .ifc \reg, r9
+                str r0, [sp, #20]
+              .else
+                mov \reg, r0
+              .endif
+            .endif
+          .endif
+        .endif
+      .endif
+    .endif
+
+    pop   {r0-r4, r9, ip, lr}           @ restore caller-save registers
+    .cfi_adjust_cfa_offset -32
+    .cfi_restore r0
+    .cfi_restore r1
+    .cfi_restore r2
+    .cfi_restore r3
+    .cfi_restore r4
+    .cfi_restore r9
+    .cfi_restore ip
+    .cfi_restore lr
+    bx lr
+.Lret_forwarding_address\name:
+    // Shift left by the forwarding address shift. This clears out the state bits since they are
+    // in the top 2 bits of the lock word.
+    mvn ip, ip
+    lsl \reg, ip, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
     bx lr
 END \name
 .endm
diff --git a/runtime/arch/arm64/entrypoints_init_arm64.cc b/runtime/arch/arm64/entrypoints_init_arm64.cc
index c2078f0..6add107 100644
--- a/runtime/arch/arm64/entrypoints_init_arm64.cc
+++ b/runtime/arch/arm64/entrypoints_init_arm64.cc
@@ -30,8 +30,7 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
-                                          const mirror::Class* ref_class);
+extern "C" size_t artInstanceOfFromCode(mirror::Object* obj, mirror::Class* ref_class);
 
 // Read barrier entrypoints.
 // art_quick_read_barrier_mark_regX uses an non-standard calling
@@ -71,12 +70,53 @@
 extern "C" mirror::Object* art_quick_read_barrier_mark_reg28(mirror::Object*);
 extern "C" mirror::Object* art_quick_read_barrier_mark_reg29(mirror::Object*);
 
+void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) {
+  // ARM64 is the architecture with the largest number of core
+  // registers (32) that supports the read barrier configuration.
+  // Because registers 30 (LR) and 31 (SP/XZR) cannot be used to pass
+  // arguments, only define ReadBarrierMarkRegX entrypoints for the
+  // first 30 registers.  This limitation is not a problem on other
+  // supported architectures (ARM, x86 and x86-64) either, as they
+  // have less core registers (resp. 16, 8 and 16).  (We may have to
+  // revise that design choice if read barrier support is added for
+  // MIPS and/or MIPS64.)
+  qpoints->pReadBarrierMarkReg00 = is_marking ? art_quick_read_barrier_mark_reg00 : nullptr;
+  qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr;
+  qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr;
+  qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr;
+  qpoints->pReadBarrierMarkReg04 = is_marking ? art_quick_read_barrier_mark_reg04 : nullptr;
+  qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr;
+  qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr;
+  qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr;
+  qpoints->pReadBarrierMarkReg08 = is_marking ? art_quick_read_barrier_mark_reg08 : nullptr;
+  qpoints->pReadBarrierMarkReg09 = is_marking ? art_quick_read_barrier_mark_reg09 : nullptr;
+  qpoints->pReadBarrierMarkReg10 = is_marking ? art_quick_read_barrier_mark_reg10 : nullptr;
+  qpoints->pReadBarrierMarkReg11 = is_marking ? art_quick_read_barrier_mark_reg11 : nullptr;
+  qpoints->pReadBarrierMarkReg12 = is_marking ? art_quick_read_barrier_mark_reg12 : nullptr;
+  qpoints->pReadBarrierMarkReg13 = is_marking ? art_quick_read_barrier_mark_reg13 : nullptr;
+  qpoints->pReadBarrierMarkReg14 = is_marking ? art_quick_read_barrier_mark_reg14 : nullptr;
+  qpoints->pReadBarrierMarkReg15 = is_marking ? art_quick_read_barrier_mark_reg15 : nullptr;
+  qpoints->pReadBarrierMarkReg17 = is_marking ? art_quick_read_barrier_mark_reg17 : nullptr;
+  qpoints->pReadBarrierMarkReg18 = is_marking ? art_quick_read_barrier_mark_reg18 : nullptr;
+  qpoints->pReadBarrierMarkReg19 = is_marking ? art_quick_read_barrier_mark_reg19 : nullptr;
+  qpoints->pReadBarrierMarkReg20 = is_marking ? art_quick_read_barrier_mark_reg20 : nullptr;
+  qpoints->pReadBarrierMarkReg21 = is_marking ? art_quick_read_barrier_mark_reg21 : nullptr;
+  qpoints->pReadBarrierMarkReg22 = is_marking ? art_quick_read_barrier_mark_reg22 : nullptr;
+  qpoints->pReadBarrierMarkReg23 = is_marking ? art_quick_read_barrier_mark_reg23 : nullptr;
+  qpoints->pReadBarrierMarkReg24 = is_marking ? art_quick_read_barrier_mark_reg24 : nullptr;
+  qpoints->pReadBarrierMarkReg25 = is_marking ? art_quick_read_barrier_mark_reg25 : nullptr;
+  qpoints->pReadBarrierMarkReg26 = is_marking ? art_quick_read_barrier_mark_reg26 : nullptr;
+  qpoints->pReadBarrierMarkReg27 = is_marking ? art_quick_read_barrier_mark_reg27 : nullptr;
+  qpoints->pReadBarrierMarkReg28 = is_marking ? art_quick_read_barrier_mark_reg28 : nullptr;
+  qpoints->pReadBarrierMarkReg29 = is_marking ? art_quick_read_barrier_mark_reg29 : nullptr;
+}
+
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   DefaultInitEntryPoints(jpoints, qpoints);
 
   // Cast
-  qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
-  qpoints->pCheckCast = art_quick_check_cast;
+  qpoints->pInstanceofNonTrivial = artInstanceOfFromCode;
+  qpoints->pCheckInstanceOf = art_quick_check_instance_of;
 
   // Math
   // TODO null entrypoints not needed for ARM64 - generate inline.
@@ -127,45 +167,8 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
-  // ARM64 is the architecture with the largest number of core
-  // registers (32) that supports the read barrier configuration.
-  // Because registers 30 (LR) and 31 (SP/XZR) cannot be used to pass
-  // arguments, only define ReadBarrierMarkRegX entrypoints for the
-  // first 30 registers.  This limitation is not a problem on other
-  // supported architectures (ARM, x86 and x86-64) either, as they
-  // have less core registers (resp. 16, 8 and 16).  (We may have to
-  // revise that design choice if read barrier support is added for
-  // MIPS and/or MIPS64.)
-  qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
-  qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
-  qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
-  qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
-  qpoints->pReadBarrierMarkReg04 = art_quick_read_barrier_mark_reg04;
-  qpoints->pReadBarrierMarkReg05 = art_quick_read_barrier_mark_reg05;
-  qpoints->pReadBarrierMarkReg06 = art_quick_read_barrier_mark_reg06;
-  qpoints->pReadBarrierMarkReg07 = art_quick_read_barrier_mark_reg07;
-  qpoints->pReadBarrierMarkReg08 = art_quick_read_barrier_mark_reg08;
-  qpoints->pReadBarrierMarkReg09 = art_quick_read_barrier_mark_reg09;
-  qpoints->pReadBarrierMarkReg10 = art_quick_read_barrier_mark_reg10;
-  qpoints->pReadBarrierMarkReg11 = art_quick_read_barrier_mark_reg11;
-  qpoints->pReadBarrierMarkReg12 = art_quick_read_barrier_mark_reg12;
-  qpoints->pReadBarrierMarkReg13 = art_quick_read_barrier_mark_reg13;
-  qpoints->pReadBarrierMarkReg14 = art_quick_read_barrier_mark_reg14;
-  qpoints->pReadBarrierMarkReg15 = art_quick_read_barrier_mark_reg15;
   qpoints->pReadBarrierMarkReg16 = nullptr;  // IP0 is used as a temp by the asm stub.
-  qpoints->pReadBarrierMarkReg17 = art_quick_read_barrier_mark_reg17;
-  qpoints->pReadBarrierMarkReg18 = art_quick_read_barrier_mark_reg18;
-  qpoints->pReadBarrierMarkReg19 = art_quick_read_barrier_mark_reg19;
-  qpoints->pReadBarrierMarkReg20 = art_quick_read_barrier_mark_reg20;
-  qpoints->pReadBarrierMarkReg21 = art_quick_read_barrier_mark_reg21;
-  qpoints->pReadBarrierMarkReg22 = art_quick_read_barrier_mark_reg22;
-  qpoints->pReadBarrierMarkReg23 = art_quick_read_barrier_mark_reg23;
-  qpoints->pReadBarrierMarkReg24 = art_quick_read_barrier_mark_reg24;
-  qpoints->pReadBarrierMarkReg25 = art_quick_read_barrier_mark_reg25;
-  qpoints->pReadBarrierMarkReg26 = art_quick_read_barrier_mark_reg26;
-  qpoints->pReadBarrierMarkReg27 = art_quick_read_barrier_mark_reg27;
-  qpoints->pReadBarrierMarkReg28 = art_quick_read_barrier_mark_reg28;
-  qpoints->pReadBarrierMarkReg29 = art_quick_read_barrier_mark_reg29;
+  UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false);
   qpoints->pReadBarrierSlow = artReadBarrierSlow;
   qpoints->pReadBarrierForRootSlow = artReadBarrierForRootSlow;
 };
diff --git a/runtime/arch/arm64/instruction_set_features_arm64.cc b/runtime/arch/arm64/instruction_set_features_arm64.cc
index cad13b2..4e7dea3 100644
--- a/runtime/arch/arm64/instruction_set_features_arm64.cc
+++ b/runtime/arch/arm64/instruction_set_features_arm64.cc
@@ -19,12 +19,13 @@
 #include <fstream>
 #include <sstream>
 
+#include "base/stl_util.h"
 #include "base/stringprintf.h"
 #include "utils.h"  // For Trim.
 
 namespace art {
 
-const Arm64InstructionSetFeatures* Arm64InstructionSetFeatures::FromVariant(
+Arm64FeaturesUniquePtr Arm64InstructionSetFeatures::FromVariant(
     const std::string& variant, std::string* error_msg) {
   const bool smp = true;  // Conservative default.
 
@@ -52,22 +53,23 @@
   // The variants that need a fix for 843419 are the same that need a fix for 835769.
   bool needs_a53_843419_fix = needs_a53_835769_fix;
 
-  return new Arm64InstructionSetFeatures(smp, needs_a53_835769_fix, needs_a53_843419_fix);
+  return Arm64FeaturesUniquePtr(
+      new Arm64InstructionSetFeatures(smp, needs_a53_835769_fix, needs_a53_843419_fix));
 }
 
-const Arm64InstructionSetFeatures* Arm64InstructionSetFeatures::FromBitmap(uint32_t bitmap) {
+Arm64FeaturesUniquePtr Arm64InstructionSetFeatures::FromBitmap(uint32_t bitmap) {
   bool smp = (bitmap & kSmpBitfield) != 0;
   bool is_a53 = (bitmap & kA53Bitfield) != 0;
-  return new Arm64InstructionSetFeatures(smp, is_a53, is_a53);
+  return Arm64FeaturesUniquePtr(new Arm64InstructionSetFeatures(smp, is_a53, is_a53));
 }
 
-const Arm64InstructionSetFeatures* Arm64InstructionSetFeatures::FromCppDefines() {
+Arm64FeaturesUniquePtr Arm64InstructionSetFeatures::FromCppDefines() {
   const bool smp = true;
   const bool is_a53 = true;  // Pessimistically assume all ARM64s are A53s.
-  return new Arm64InstructionSetFeatures(smp, is_a53, is_a53);
+  return Arm64FeaturesUniquePtr(new Arm64InstructionSetFeatures(smp, is_a53, is_a53));
 }
 
-const Arm64InstructionSetFeatures* Arm64InstructionSetFeatures::FromCpuInfo() {
+Arm64FeaturesUniquePtr Arm64InstructionSetFeatures::FromCpuInfo() {
   // Look in /proc/cpuinfo for features we need.  Only use this when we can guarantee that
   // the kernel puts the appropriate feature flags in here.  Sometimes it doesn't.
   bool smp = false;
@@ -89,16 +91,16 @@
   } else {
     LOG(ERROR) << "Failed to open /proc/cpuinfo";
   }
-  return new Arm64InstructionSetFeatures(smp, is_a53, is_a53);
+  return Arm64FeaturesUniquePtr(new Arm64InstructionSetFeatures(smp, is_a53, is_a53));
 }
 
-const Arm64InstructionSetFeatures* Arm64InstructionSetFeatures::FromHwcap() {
+Arm64FeaturesUniquePtr Arm64InstructionSetFeatures::FromHwcap() {
   bool smp = sysconf(_SC_NPROCESSORS_CONF) > 1;
   const bool is_a53 = true;  // Pessimistically assume all ARM64s are A53s.
-  return new Arm64InstructionSetFeatures(smp, is_a53, is_a53);
+  return Arm64FeaturesUniquePtr(new Arm64InstructionSetFeatures(smp, is_a53, is_a53));
 }
 
-const Arm64InstructionSetFeatures* Arm64InstructionSetFeatures::FromAssembly() {
+Arm64FeaturesUniquePtr Arm64InstructionSetFeatures::FromAssembly() {
   UNIMPLEMENTED(WARNING);
   return FromCppDefines();
 }
@@ -130,7 +132,8 @@
   return result;
 }
 
-const InstructionSetFeatures* Arm64InstructionSetFeatures::AddFeaturesFromSplitString(
+std::unique_ptr<const InstructionSetFeatures>
+Arm64InstructionSetFeatures::AddFeaturesFromSplitString(
     const bool smp, const std::vector<std::string>& features, std::string* error_msg) const {
   bool is_a53 = fix_cortex_a53_835769_;
   for (auto i = features.begin(); i != features.end(); i++) {
@@ -144,7 +147,8 @@
       return nullptr;
     }
   }
-  return new Arm64InstructionSetFeatures(smp, is_a53, is_a53);
+  return std::unique_ptr<const InstructionSetFeatures>(
+      new Arm64InstructionSetFeatures(smp, is_a53, is_a53));
 }
 
 }  // namespace art
diff --git a/runtime/arch/arm64/instruction_set_features_arm64.h b/runtime/arch/arm64/instruction_set_features_arm64.h
index abd7e83..e51aa1c 100644
--- a/runtime/arch/arm64/instruction_set_features_arm64.h
+++ b/runtime/arch/arm64/instruction_set_features_arm64.h
@@ -21,29 +21,31 @@
 
 namespace art {
 
+class Arm64InstructionSetFeatures;
+using Arm64FeaturesUniquePtr = std::unique_ptr<const Arm64InstructionSetFeatures>;
+
 // Instruction set features relevant to the ARM64 architecture.
 class Arm64InstructionSetFeatures FINAL : public InstructionSetFeatures {
  public:
   // Process a CPU variant string like "krait" or "cortex-a15" and create InstructionSetFeatures.
-  static const Arm64InstructionSetFeatures* FromVariant(const std::string& variant,
-                                                        std::string* error_msg);
+  static Arm64FeaturesUniquePtr FromVariant(const std::string& variant, std::string* error_msg);
 
   // Parse a bitmap and create an InstructionSetFeatures.
-  static const Arm64InstructionSetFeatures* FromBitmap(uint32_t bitmap);
+  static Arm64FeaturesUniquePtr FromBitmap(uint32_t bitmap);
 
   // Turn C pre-processor #defines into the equivalent instruction set features.
-  static const Arm64InstructionSetFeatures* FromCppDefines();
+  static Arm64FeaturesUniquePtr FromCppDefines();
 
   // Process /proc/cpuinfo and use kRuntimeISA to produce InstructionSetFeatures.
-  static const Arm64InstructionSetFeatures* FromCpuInfo();
+  static Arm64FeaturesUniquePtr FromCpuInfo();
 
   // Process the auxiliary vector AT_HWCAP entry and use kRuntimeISA to produce
   // InstructionSetFeatures.
-  static const Arm64InstructionSetFeatures* FromHwcap();
+  static Arm64FeaturesUniquePtr FromHwcap();
 
   // Use assembly tests of the current runtime (ie kRuntimeISA) to determine the
   // InstructionSetFeatures. This works around kernel bugs in AT_HWCAP and /proc/cpuinfo.
-  static const Arm64InstructionSetFeatures* FromAssembly();
+  static Arm64FeaturesUniquePtr FromAssembly();
 
   bool Equals(const InstructionSetFeatures* other) const OVERRIDE;
 
@@ -70,7 +72,7 @@
 
  protected:
   // Parse a vector of the form "a53" adding these to a new ArmInstructionSetFeatures.
-  const InstructionSetFeatures*
+  std::unique_ptr<const InstructionSetFeatures>
       AddFeaturesFromSplitString(const bool smp, const std::vector<std::string>& features,
                                  std::string* error_msg) const OVERRIDE;
 
diff --git a/runtime/arch/arm64/quick_entrypoints_arm64.S b/runtime/arch/arm64/quick_entrypoints_arm64.S
index 04a3cc6..73bca03 100644
--- a/runtime/arch/arm64/quick_entrypoints_arm64.S
+++ b/runtime/arch/arm64/quick_entrypoints_arm64.S
@@ -337,7 +337,7 @@
     SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR
 .endm
 
-.macro RESTORE_SAVE_EVERYTHING_FRAME
+.macro RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
     // Restore FP registers.
     // For better performance, load d0 and d31 separately, so that all LDPs are 16-byte aligned.
     ldr d0,       [sp, #8]
@@ -359,7 +359,6 @@
     ldr d31,      [sp, #256]
 
     // Restore core registers.
-    RESTORE_REG            x0, 264
     RESTORE_TWO_REGS  x1,  x2, 272
     RESTORE_TWO_REGS  x3,  x4, 288
     RESTORE_TWO_REGS  x5,  x6, 304
@@ -379,6 +378,11 @@
     DECREASE_FRAME 512
 .endm
 
+.macro RESTORE_SAVE_EVERYTHING_FRAME
+    RESTORE_REG            x0, 264
+    RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
+.endm
+
 .macro RETURN_IF_RESULT_IS_ZERO
     cbnz x0, 1f                // result non-zero branch over
     ret                        // return
@@ -392,11 +396,10 @@
 .endm
 
     /*
-     * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
-     * exception is Thread::Current()->exception_
+     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
+     * exception is Thread::Current()->exception_ when the runtime method frame is ready.
      */
-.macro DELIVER_PENDING_EXCEPTION
-    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+.macro DELIVER_PENDING_EXCEPTION_FRAME_READY
     mov x0, xSELF
 
     // Point of no return.
@@ -404,6 +407,15 @@
     brk 0  // Unreached
 .endm
 
+    /*
+     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
+     * exception is Thread::Current()->exception_.
+     */
+.macro DELIVER_PENDING_EXCEPTION
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
+    DELIVER_PENDING_EXCEPTION_FRAME_READY
+.endm
+
 .macro RETURN_OR_DELIVER_PENDING_EXCEPTION_REG reg
     ldr \reg, [xSELF, # THREAD_EXCEPTION_OFFSET]   // Get exception field.
     cbnz \reg, 1f
@@ -1282,18 +1294,19 @@
 END art_quick_unlock_object_no_inline
 
     /*
-     * Entry from managed code that calls artIsAssignableFromCode and on failure calls
-     * artThrowClassCastException.
+     * Entry from managed code that calls artInstanceOfFromCode and on failure calls
+     * artThrowClassCastExceptionForObject.
      */
-    .extern artThrowClassCastException
-ENTRY art_quick_check_cast
+    .extern artInstanceOfFromCode
+    .extern artThrowClassCastExceptionForObject
+ENTRY art_quick_check_instance_of
     // Store arguments and link register
     // Stack needs to be 16B aligned on calls.
     SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 32
     SAVE_REG xLR, 24
 
     // Call runtime code
-    bl artIsAssignableFromCode
+    bl artInstanceOfFromCode
 
     // Check for exception
     cbz x0, .Lthrow_class_cast_exception
@@ -1312,9 +1325,9 @@
 
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
     mov x2, xSELF                     // pass Thread::Current
-    bl artThrowClassCastException     // (Class*, Class*, Thread*)
+    bl artThrowClassCastExceptionForObject     // (Object*, Class*, Thread*)
     brk 0                             // We should not return here...
-END art_quick_check_cast
+END art_quick_check_instance_of
 
 // Restore xReg's value from [sp, #offset] if xReg is not the same as xExclude.
 .macro POP_REG_NE xReg, offset, xExclude
@@ -1638,40 +1651,54 @@
      */
 
 ENTRY art_quick_resolve_string
-    ldr   x1, [sp]                                               // load referrer
-    ldr   w2, [x1, #ART_METHOD_DECLARING_CLASS_OFFSET]           // load declaring class
-    ldr   x1, [x2, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET]    // load string dex cache
-    ubfx  x2, x0, #0, #STRING_DEX_CACHE_HASH_BITS                // get masked string index into x2
-    ldr   x2, [x1, x2, lsl #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT]  // load dex cache pair into x2
-    cmp   x0, x2, lsr #32                                         // compare against upper 32 bits
+    SAVE_TWO_REGS_INCREASE_FRAME x29, xLR, 2 * __SIZEOF_POINTER__
+    ldr   x29, [sp, #(2 * __SIZEOF_POINTER__)]                   // load referrer
+    ldr   w29, [x29, #ART_METHOD_DECLARING_CLASS_OFFSET]         // load declaring class
+    ldr   x29, [x29, #DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET]  // load string dex cache
+    ubfx  lr, x0, #0, #STRING_DEX_CACHE_HASH_BITS                // get masked string index into LR
+    ldr   x29, [x29, lr, lsl #STRING_DEX_CACHE_ELEMENT_SIZE_SHIFT]  // load dex cache pair into x29
+    cmp   x0, x29, lsr #32                                       // compare against upper 32 bits
     bne   .Lart_quick_resolve_string_slow_path
-    ubfx  x0, x2, #0, #32                                        // extract lower 32 bits into x0
+    ubfx  x0, x29, #0, #32                                       // extract lower 32 bits into x0
 #ifdef USE_READ_BARRIER
     // Most common case: GC is not marking.
-    ldr    w3, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
-    cbnz   x3, .Lart_quick_resolve_string_marking
+    ldr    w29, [xSELF, #THREAD_IS_GC_MARKING_OFFSET]
+    cbnz   x29, .Lart_quick_resolve_string_marking
+.Lart_quick_resolve_string_no_rb:
 #endif
+    .cfi_remember_state
+    RESTORE_TWO_REGS_DECREASE_FRAME x29, xLR, 2 * __SIZEOF_POINTER__
     ret
+    .cfi_restore_state
+    .cfi_def_cfa_offset 16                          // workaround for clang bug: 31975598
+
+#ifdef USE_READ_BARRIER
+// GC is marking case, need to check the mark bit.
+.Lart_quick_resolve_string_marking:
+    ldr   x29, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
+    tbnz  x29, #LOCK_WORD_MARK_BIT_SHIFT, .Lart_quick_resolve_string_no_rb
+    .cfi_remember_state
+    RESTORE_TWO_REGS_DECREASE_FRAME x29, xLR, 2 * __SIZEOF_POINTER__
+    // Note: art_quick_read_barrier_mark_reg00 clobbers IP0 but the .Lslow_rb_* does not.
+    b     .Lslow_rb_art_quick_read_barrier_mark_reg00  // Get the marked string back.
+    .cfi_restore_state
+    .cfi_def_cfa_offset 16                          // workaround for clang bug: 31975598
+#endif
 
 // Slow path case, the index did not match.
 .Lart_quick_resolve_string_slow_path:
-    SETUP_SAVE_REFS_ONLY_FRAME                      // save callee saves in case of GC
+    INCREASE_FRAME (FRAME_SIZE_SAVE_EVERYTHING - 2 * __SIZEOF_POINTER__)
+    SETUP_SAVE_EVERYTHING_FRAME_DECREMENTED_SP_SKIP_X29_LR  // save callee saves in case of GC
     mov   x1, xSELF                                 // pass Thread::Current
     bl    artResolveStringFromCode                  // (int32_t string_idx, Thread* self)
-    RESTORE_SAVE_REFS_ONLY_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-
-// GC is marking case, need to check the mark bit.
-.Lart_quick_resolve_string_marking:
-    ldr   x3, [x0, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    tbnz  x3, #LOCK_WORD_MARK_BIT_SHIFT, .Lart_quick_resolve_string_no_rb
-    // Save LR so that we can return, also x1 for alignment purposes.
-    SAVE_TWO_REGS_INCREASE_FRAME x1, xLR, 16        // Save x1, LR.
-    bl     artReadBarrierMark                       // Get the marked string back.
-    RESTORE_TWO_REGS_DECREASE_FRAME x1, xLR, 16     // Restore registers.
-.Lart_quick_resolve_string_no_rb:
-    ret
-
+    cbz   w0, 1f                                    // If result is null, deliver the OOME.
+    .cfi_remember_state
+    RESTORE_SAVE_EVERYTHING_FRAME_KEEP_X0
+    ret                        // return
+    .cfi_restore_state
+    .cfi_def_cfa_offset FRAME_SIZE_SAVE_EVERYTHING  // workaround for clang bug: 31975598
+1:
+    DELIVER_PENDING_EXCEPTION_FRAME_READY
 END art_quick_resolve_string
 
 // Generate the allocation entrypoints for each allocator.
@@ -2376,12 +2403,15 @@
      *    w2:   Starting offset in string data
      */
 ENTRY art_quick_indexof
+#if (STRING_COMPRESSION_FEATURE)
+    ldr   w4, [x0, #MIRROR_STRING_COUNT_OFFSET]
+#else
     ldr   w3, [x0, #MIRROR_STRING_COUNT_OFFSET]
+#endif
     add   x0, x0, #MIRROR_STRING_VALUE_OFFSET
 #if (STRING_COMPRESSION_FEATURE)
     /* w4 holds count (with flag) and w3 holds actual length */
-    mov   w4, w3
-    and   w3, w3, #2147483647
+    lsr   w3, w4, #1
 #endif
     /* Clamp start to [0..count] */
     cmp   w2, #0
@@ -2393,7 +2423,7 @@
     mov   x5, x0
 
 #if (STRING_COMPRESSION_FEATURE)
-    tbnz  w4, #31, .Lstring_indexof_compressed
+    tbz   w4, #0, .Lstring_indexof_compressed
 #endif
     /* Build pointer to start of data to compare and pre-bias */
     add   x0, x0, x2, lsl #1
@@ -2513,9 +2543,17 @@
      */
     // Use wIP0 as temp and check the mark bit of the reference. wIP0 is not used by the compiler.
     ldr   wIP0, [\xreg, #MIRROR_OBJECT_LOCK_WORD_OFFSET]
-    tbz   wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lslow_path_rb_\name
+    tbz   wIP0, #LOCK_WORD_MARK_BIT_SHIFT, .Lnot_marked_rb_\name
+.Lret_rb_\name:
     ret
-.Lslow_path_rb_\name:
+.Lnot_marked_rb_\name:
+    // Check if the top two bits are one, if this is the case it is a forwarding address.
+    mvn wIP0, wIP0
+    cmp wzr, wIP0, lsr #30
+    beq .Lret_forwarding_address\name
+.Lslow_rb_\name:
+    // We must not clobber IP0 since art_quick_resolve_string makes a tail call here and relies on
+    // IP0 being restored.
     // Save all potentially live caller-save core registers.
     SAVE_TWO_REGS_INCREASE_FRAME x0, x1, 368
     SAVE_TWO_REGS  x2,  x3, 16
@@ -2580,7 +2618,12 @@
     // Restore return address and remove padding.
     RESTORE_REG xLR, 360
     DECREASE_FRAME 368
-.Lret_rb_\name:
+    ret
+.Lret_forwarding_address\name:
+    mvn wIP0, wIP0
+    // Shift left by the forwarding address shift. This clears out the state bits since they are
+    // in the top 2 bits of the lock word.
+    lsl \wreg, wIP0, #LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT
     ret
 END \name
 .endm
diff --git a/runtime/arch/instruction_set_features.cc b/runtime/arch/instruction_set_features.cc
index 898f83a..b32391f 100644
--- a/runtime/arch/instruction_set_features.cc
+++ b/runtime/arch/instruction_set_features.cc
@@ -29,29 +29,28 @@
 
 namespace art {
 
-const InstructionSetFeatures* InstructionSetFeatures::FromVariant(InstructionSet isa,
-                                                                  const std::string& variant,
-                                                                  std::string* error_msg) {
-  const InstructionSetFeatures* result;
+std::unique_ptr<const InstructionSetFeatures> InstructionSetFeatures::FromVariant(
+    InstructionSet isa, const std::string& variant, std::string* error_msg) {
+  std::unique_ptr<const InstructionSetFeatures> result;
   switch (isa) {
     case kArm:
     case kThumb2:
-      result = ArmInstructionSetFeatures::FromVariant(variant, error_msg);
+      result.reset(ArmInstructionSetFeatures::FromVariant(variant, error_msg).release());
       break;
     case kArm64:
-      result = Arm64InstructionSetFeatures::FromVariant(variant, error_msg);
+      result.reset(Arm64InstructionSetFeatures::FromVariant(variant, error_msg).release());
       break;
     case kMips:
-      result = MipsInstructionSetFeatures::FromVariant(variant, error_msg);
+      result.reset(MipsInstructionSetFeatures::FromVariant(variant, error_msg).release());
       break;
     case kMips64:
       result = Mips64InstructionSetFeatures::FromVariant(variant, error_msg);
       break;
     case kX86:
-      result = X86InstructionSetFeatures::FromVariant(variant, error_msg);
+      result.reset(X86InstructionSetFeatures::FromVariant(variant, error_msg).release());
       break;
     case kX86_64:
-      result = X86_64InstructionSetFeatures::FromVariant(variant, error_msg);
+      result.reset(X86_64InstructionSetFeatures::FromVariant(variant, error_msg).release());
       break;
     default:
       UNIMPLEMENTED(FATAL) << isa;
@@ -61,28 +60,28 @@
   return result;
 }
 
-const InstructionSetFeatures* InstructionSetFeatures::FromBitmap(InstructionSet isa,
-                                                                 uint32_t bitmap) {
-  const InstructionSetFeatures* result;
+std::unique_ptr<const InstructionSetFeatures> InstructionSetFeatures::FromBitmap(InstructionSet isa,
+                                                                                 uint32_t bitmap) {
+  std::unique_ptr<const InstructionSetFeatures> result;
   switch (isa) {
     case kArm:
     case kThumb2:
-      result = ArmInstructionSetFeatures::FromBitmap(bitmap);
+      result.reset(ArmInstructionSetFeatures::FromBitmap(bitmap).release());
       break;
     case kArm64:
-      result = Arm64InstructionSetFeatures::FromBitmap(bitmap);
+      result.reset(Arm64InstructionSetFeatures::FromBitmap(bitmap).release());
       break;
     case kMips:
-      result = MipsInstructionSetFeatures::FromBitmap(bitmap);
+      result.reset(MipsInstructionSetFeatures::FromBitmap(bitmap).release());
       break;
     case kMips64:
       result = Mips64InstructionSetFeatures::FromBitmap(bitmap);
       break;
     case kX86:
-      result = X86InstructionSetFeatures::FromBitmap(bitmap);
+      result.reset(X86InstructionSetFeatures::FromBitmap(bitmap).release());
       break;
     case kX86_64:
-      result = X86_64InstructionSetFeatures::FromBitmap(bitmap);
+      result.reset(X86_64InstructionSetFeatures::FromBitmap(bitmap).release());
       break;
     default:
       UNIMPLEMENTED(FATAL) << isa;
@@ -92,27 +91,27 @@
   return result;
 }
 
-const InstructionSetFeatures* InstructionSetFeatures::FromCppDefines() {
-  const InstructionSetFeatures* result;
+std::unique_ptr<const InstructionSetFeatures> InstructionSetFeatures::FromCppDefines() {
+  std::unique_ptr<const InstructionSetFeatures> result;
   switch (kRuntimeISA) {
     case kArm:
     case kThumb2:
-      result = ArmInstructionSetFeatures::FromCppDefines();
+      result.reset(ArmInstructionSetFeatures::FromCppDefines().release());
       break;
     case kArm64:
-      result = Arm64InstructionSetFeatures::FromCppDefines();
+      result.reset(Arm64InstructionSetFeatures::FromCppDefines().release());
       break;
     case kMips:
-      result = MipsInstructionSetFeatures::FromCppDefines();
+      result.reset(MipsInstructionSetFeatures::FromCppDefines().release());
       break;
     case kMips64:
       result = Mips64InstructionSetFeatures::FromCppDefines();
       break;
     case kX86:
-      result = X86InstructionSetFeatures::FromCppDefines();
+      result.reset(X86InstructionSetFeatures::FromCppDefines().release());
       break;
     case kX86_64:
-      result = X86_64InstructionSetFeatures::FromCppDefines();
+      result.reset(X86_64InstructionSetFeatures::FromCppDefines().release());
       break;
     default:
       UNIMPLEMENTED(FATAL) << kRuntimeISA;
@@ -122,27 +121,27 @@
 }
 
 
-const InstructionSetFeatures* InstructionSetFeatures::FromCpuInfo() {
-  const InstructionSetFeatures* result;
+std::unique_ptr<const InstructionSetFeatures> InstructionSetFeatures::FromCpuInfo() {
+  std::unique_ptr<const InstructionSetFeatures> result;
   switch (kRuntimeISA) {
     case kArm:
     case kThumb2:
-      result = ArmInstructionSetFeatures::FromCpuInfo();
+      result.reset(ArmInstructionSetFeatures::FromCpuInfo().release());
       break;
     case kArm64:
-      result = Arm64InstructionSetFeatures::FromCpuInfo();
+      result.reset(Arm64InstructionSetFeatures::FromCpuInfo().release());
       break;
     case kMips:
-      result = MipsInstructionSetFeatures::FromCpuInfo();
+      result.reset(MipsInstructionSetFeatures::FromCpuInfo().release());
       break;
     case kMips64:
       result = Mips64InstructionSetFeatures::FromCpuInfo();
       break;
     case kX86:
-      result = X86InstructionSetFeatures::FromCpuInfo();
+      result.reset(X86InstructionSetFeatures::FromCpuInfo().release());
       break;
     case kX86_64:
-      result = X86_64InstructionSetFeatures::FromCpuInfo();
+      result.reset(X86_64InstructionSetFeatures::FromCpuInfo().release());
       break;
     default:
       UNIMPLEMENTED(FATAL) << kRuntimeISA;
@@ -151,27 +150,27 @@
   return result;
 }
 
-const InstructionSetFeatures* InstructionSetFeatures::FromHwcap() {
-  const InstructionSetFeatures* result;
+std::unique_ptr<const InstructionSetFeatures> InstructionSetFeatures::FromHwcap() {
+  std::unique_ptr<const InstructionSetFeatures> result;
   switch (kRuntimeISA) {
     case kArm:
     case kThumb2:
-      result = ArmInstructionSetFeatures::FromHwcap();
+      result.reset(ArmInstructionSetFeatures::FromHwcap().release());
       break;
     case kArm64:
-      result = Arm64InstructionSetFeatures::FromHwcap();
+      result.reset(Arm64InstructionSetFeatures::FromHwcap().release());
       break;
     case kMips:
-      result = MipsInstructionSetFeatures::FromHwcap();
+      result.reset(MipsInstructionSetFeatures::FromHwcap().release());
       break;
     case kMips64:
       result = Mips64InstructionSetFeatures::FromHwcap();
       break;
     case kX86:
-      result = X86InstructionSetFeatures::FromHwcap();
+      result.reset(X86InstructionSetFeatures::FromHwcap().release());
       break;
     case kX86_64:
-      result = X86_64InstructionSetFeatures::FromHwcap();
+      result.reset(X86_64InstructionSetFeatures::FromHwcap().release());
       break;
     default:
       UNIMPLEMENTED(FATAL) << kRuntimeISA;
@@ -180,27 +179,27 @@
   return result;
 }
 
-const InstructionSetFeatures* InstructionSetFeatures::FromAssembly() {
-  const InstructionSetFeatures* result;
+std::unique_ptr<const InstructionSetFeatures> InstructionSetFeatures::FromAssembly() {
+  std::unique_ptr<const InstructionSetFeatures> result;
   switch (kRuntimeISA) {
     case kArm:
     case kThumb2:
-      result = ArmInstructionSetFeatures::FromAssembly();
+      result.reset(ArmInstructionSetFeatures::FromAssembly().release());
       break;
     case kArm64:
-      result = Arm64InstructionSetFeatures::FromAssembly();
+      result.reset(Arm64InstructionSetFeatures::FromAssembly().release());
       break;
     case kMips:
-      result = MipsInstructionSetFeatures::FromAssembly();
+      result.reset(MipsInstructionSetFeatures::FromAssembly().release());
       break;
     case kMips64:
       result = Mips64InstructionSetFeatures::FromAssembly();
       break;
     case kX86:
-      result = X86InstructionSetFeatures::FromAssembly();
+      result.reset(X86InstructionSetFeatures::FromAssembly().release());
       break;
     case kX86_64:
-      result = X86_64InstructionSetFeatures::FromAssembly();
+      result.reset(X86_64InstructionSetFeatures::FromAssembly().release());
       break;
     default:
       UNIMPLEMENTED(FATAL) << kRuntimeISA;
@@ -209,11 +208,11 @@
   return result;
 }
 
-const InstructionSetFeatures* InstructionSetFeatures::AddFeaturesFromString(
+std::unique_ptr<const InstructionSetFeatures> InstructionSetFeatures::AddFeaturesFromString(
     const std::string& feature_list, std::string* error_msg) const {
   if (feature_list.empty()) {
     *error_msg = "No instruction set features specified";
-    return nullptr;
+    return std::unique_ptr<const InstructionSetFeatures>();
   }
   std::vector<std::string> features;
   Split(feature_list, ',', &features);
@@ -223,7 +222,7 @@
   for (auto it = features.begin(); it != features.end();) {
     if (use_default) {
       *error_msg = "Unexpected instruction set features after 'default'";
-      return nullptr;
+      return std::unique_ptr<const InstructionSetFeatures>();
     }
     std::string feature = Trim(*it);
     bool erase = false;
@@ -233,7 +232,7 @@
         erase = true;
       } else {
         *error_msg = "Unexpected instruction set features before 'default'";
-        return nullptr;
+        return std::unique_ptr<const InstructionSetFeatures>();
       }
     } else if (feature == "smp") {
       smp = true;
diff --git a/runtime/arch/instruction_set_features.h b/runtime/arch/instruction_set_features.h
index d10ae21..d84bc02 100644
--- a/runtime/arch/instruction_set_features.h
+++ b/runtime/arch/instruction_set_features.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_ARCH_INSTRUCTION_SET_FEATURES_H_
 #define ART_RUNTIME_ARCH_INSTRUCTION_SET_FEATURES_H_
 
+#include <memory>
 #include <ostream>
 #include <vector>
 
@@ -36,31 +37,32 @@
 class InstructionSetFeatures {
  public:
   // Process a CPU variant string for the given ISA and create an InstructionSetFeatures.
-  static const InstructionSetFeatures* FromVariant(InstructionSet isa,
-                                                   const std::string& variant,
-                                                   std::string* error_msg);
+  static std::unique_ptr<const InstructionSetFeatures> FromVariant(InstructionSet isa,
+                                                                   const std::string& variant,
+                                                                   std::string* error_msg);
 
   // Parse a bitmap for the given isa and create an InstructionSetFeatures.
-  static const InstructionSetFeatures* FromBitmap(InstructionSet isa, uint32_t bitmap);
+  static std::unique_ptr<const InstructionSetFeatures> FromBitmap(InstructionSet isa,
+                                                                  uint32_t bitmap);
 
   // Turn C pre-processor #defines into the equivalent instruction set features for kRuntimeISA.
-  static const InstructionSetFeatures* FromCppDefines();
+  static std::unique_ptr<const InstructionSetFeatures> FromCppDefines();
 
   // Process /proc/cpuinfo and use kRuntimeISA to produce InstructionSetFeatures.
-  static const InstructionSetFeatures* FromCpuInfo();
+  static std::unique_ptr<const InstructionSetFeatures> FromCpuInfo();
 
   // Process the auxiliary vector AT_HWCAP entry and use kRuntimeISA to produce
   // InstructionSetFeatures.
-  static const InstructionSetFeatures* FromHwcap();
+  static std::unique_ptr<const InstructionSetFeatures> FromHwcap();
 
   // Use assembly tests of the current runtime (ie kRuntimeISA) to determine the
   // InstructionSetFeatures. This works around kernel bugs in AT_HWCAP and /proc/cpuinfo.
-  static const InstructionSetFeatures* FromAssembly();
+  static std::unique_ptr<const InstructionSetFeatures> FromAssembly();
 
   // Parse a string of the form "div,-atomic_ldrd_strd" adding and removing these features to
   // create a new InstructionSetFeatures.
-  const InstructionSetFeatures* AddFeaturesFromString(const std::string& feature_list,
-                                                      std::string* error_msg) const WARN_UNUSED;
+  std::unique_ptr<const InstructionSetFeatures> AddFeaturesFromString(
+      const std::string& feature_list, std::string* error_msg) const WARN_UNUSED;
 
   // Are these features the same as the other given features?
   virtual bool Equals(const InstructionSetFeatures* other) const = 0;
@@ -107,7 +109,7 @@
                                  const std::string& variant);
 
   // Add architecture specific features in sub-classes.
-  virtual const InstructionSetFeatures*
+  virtual std::unique_ptr<const InstructionSetFeatures>
       AddFeaturesFromSplitString(bool smp, const std::vector<std::string>& features,
                                  std::string* error_msg) const = 0;
 
diff --git a/runtime/arch/mips/entrypoints_init_mips.cc b/runtime/arch/mips/entrypoints_init_mips.cc
index e10d4e6..6a442a5 100644
--- a/runtime/arch/mips/entrypoints_init_mips.cc
+++ b/runtime/arch/mips/entrypoints_init_mips.cc
@@ -30,8 +30,7 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
-                                          const mirror::Class* ref_class);
+extern "C" size_t artInstanceOfFromCode(mirror::Object* obj, mirror::Class* ref_class);
 
 // Math entrypoints.
 extern int32_t CmpgDouble(double a, double b);
@@ -60,6 +59,10 @@
 extern "C" int64_t __divdi3(int64_t, int64_t);
 extern "C" int64_t __moddi3(int64_t, int64_t);
 
+// No read barrier entrypoints for marking registers.
+void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints ATTRIBUTE_UNUSED,
+                                  bool is_marking ATTRIBUTE_UNUSED) {}
+
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   // Note: MIPS has asserts checking for the type of entrypoint. Don't move it
   //       to InitDefaultEntryPoints().
@@ -71,10 +74,10 @@
   ResetQuickAllocEntryPoints(qpoints);
 
   // Cast
-  qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
+  qpoints->pInstanceofNonTrivial = artInstanceOfFromCode;
   static_assert(IsDirectEntrypoint(kQuickInstanceofNonTrivial), "Direct C stub not marked direct.");
-  qpoints->pCheckCast = art_quick_check_cast;
-  static_assert(!IsDirectEntrypoint(kQuickCheckCast), "Non-direct C stub marked direct.");
+  qpoints->pCheckInstanceOf = art_quick_check_instance_of;
+  static_assert(!IsDirectEntrypoint(kQuickCheckInstanceOf), "Non-direct C stub marked direct.");
 
   // DexCache
   qpoints->pInitializeStaticStorage = art_quick_initialize_static_storage;
@@ -153,17 +156,24 @@
   // JNI
   qpoints->pJniMethodStart = JniMethodStart;
   static_assert(!IsDirectEntrypoint(kQuickJniMethodStart), "Non-direct C stub marked direct.");
+  qpoints->pJniMethodFastStart = JniMethodFastStart;
+  static_assert(!IsDirectEntrypoint(kQuickJniMethodFastStart), "Non-direct C stub marked direct.");
   qpoints->pJniMethodStartSynchronized = JniMethodStartSynchronized;
   static_assert(!IsDirectEntrypoint(kQuickJniMethodStartSynchronized),
                 "Non-direct C stub marked direct.");
   qpoints->pJniMethodEnd = JniMethodEnd;
   static_assert(!IsDirectEntrypoint(kQuickJniMethodEnd), "Non-direct C stub marked direct.");
+  qpoints->pJniMethodFastEnd = JniMethodFastEnd;
+  static_assert(!IsDirectEntrypoint(kQuickJniMethodFastEnd), "Non-direct C stub marked direct.");
   qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized;
   static_assert(!IsDirectEntrypoint(kQuickJniMethodEndSynchronized),
                 "Non-direct C stub marked direct.");
   qpoints->pJniMethodEndWithReference = JniMethodEndWithReference;
   static_assert(!IsDirectEntrypoint(kQuickJniMethodEndWithReference),
                 "Non-direct C stub marked direct.");
+  qpoints->pJniMethodFastEndWithReference = JniMethodFastEndWithReference;
+  static_assert(!IsDirectEntrypoint(kQuickJniMethodFastEndWithReference),
+                "Non-direct C stub marked direct.");
   qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized;
   static_assert(!IsDirectEntrypoint(kQuickJniMethodEndWithReferenceSynchronized),
                 "Non-direct C stub marked direct.");
diff --git a/runtime/arch/mips/instruction_set_features_mips.cc b/runtime/arch/mips/instruction_set_features_mips.cc
index b3a9866..a95b6f6 100644
--- a/runtime/arch/mips/instruction_set_features_mips.cc
+++ b/runtime/arch/mips/instruction_set_features_mips.cc
@@ -19,6 +19,7 @@
 #include <fstream>
 #include <sstream>
 
+#include "base/stl_util.h"
 #include "base/stringprintf.h"
 #include "utils.h"  // For Trim.
 
@@ -63,7 +64,7 @@
   }
 }
 
-const MipsInstructionSetFeatures* MipsInstructionSetFeatures::FromVariant(
+MipsFeaturesUniquePtr MipsInstructionSetFeatures::FromVariant(
     const std::string& variant, std::string* error_msg ATTRIBUTE_UNUSED) {
 
   bool smp = true;  // Conservative default.
@@ -97,18 +98,19 @@
     LOG(WARNING) << "Unexpected CPU variant for Mips32 using defaults: " << variant;
   }
 
-  return new MipsInstructionSetFeatures(smp, fpu_32bit, mips_isa_gte2, r6);
+  return MipsFeaturesUniquePtr(new MipsInstructionSetFeatures(smp, fpu_32bit, mips_isa_gte2, r6));
 }
 
-const MipsInstructionSetFeatures* MipsInstructionSetFeatures::FromBitmap(uint32_t bitmap) {
+MipsFeaturesUniquePtr MipsInstructionSetFeatures::FromBitmap(
+    uint32_t bitmap) {
   bool smp = (bitmap & kSmpBitfield) != 0;
   bool fpu_32bit = (bitmap & kFpu32Bitfield) != 0;
   bool mips_isa_gte2 = (bitmap & kIsaRevGte2Bitfield) != 0;
   bool r6 = (bitmap & kR6) != 0;
-  return new MipsInstructionSetFeatures(smp, fpu_32bit, mips_isa_gte2, r6);
+  return MipsFeaturesUniquePtr(new MipsInstructionSetFeatures(smp, fpu_32bit, mips_isa_gte2, r6));
 }
 
-const MipsInstructionSetFeatures* MipsInstructionSetFeatures::FromCppDefines() {
+MipsFeaturesUniquePtr MipsInstructionSetFeatures::FromCppDefines() {
   // Assume conservative defaults.
   const bool smp = true;
 
@@ -117,10 +119,10 @@
   bool r6;
   GetFlagsFromCppDefined(&mips_isa_gte2, &r6, &fpu_32bit);
 
-  return new MipsInstructionSetFeatures(smp, fpu_32bit, mips_isa_gte2, r6);
+  return MipsFeaturesUniquePtr(new MipsInstructionSetFeatures(smp, fpu_32bit, mips_isa_gte2, r6));
 }
 
-const MipsInstructionSetFeatures* MipsInstructionSetFeatures::FromCpuInfo() {
+MipsFeaturesUniquePtr MipsInstructionSetFeatures::FromCpuInfo() {
   // Look in /proc/cpuinfo for features we need.  Only use this when we can guarantee that
   // the kernel puts the appropriate feature flags in here.  Sometimes it doesn't.
   // Assume conservative defaults.
@@ -147,15 +149,15 @@
   } else {
     LOG(ERROR) << "Failed to open /proc/cpuinfo";
   }
-  return new MipsInstructionSetFeatures(smp, fpu_32bit, mips_isa_gte2, r6);
+  return MipsFeaturesUniquePtr(new MipsInstructionSetFeatures(smp, fpu_32bit, mips_isa_gte2, r6));
 }
 
-const MipsInstructionSetFeatures* MipsInstructionSetFeatures::FromHwcap() {
+MipsFeaturesUniquePtr MipsInstructionSetFeatures::FromHwcap() {
   UNIMPLEMENTED(WARNING);
   return FromCppDefines();
 }
 
-const MipsInstructionSetFeatures* MipsInstructionSetFeatures::FromAssembly() {
+MipsFeaturesUniquePtr MipsInstructionSetFeatures::FromAssembly() {
   UNIMPLEMENTED(WARNING);
   return FromCppDefines();
 }
@@ -201,7 +203,8 @@
   return result;
 }
 
-const InstructionSetFeatures* MipsInstructionSetFeatures::AddFeaturesFromSplitString(
+std::unique_ptr<const InstructionSetFeatures>
+MipsInstructionSetFeatures::AddFeaturesFromSplitString(
     const bool smp, const std::vector<std::string>& features, std::string* error_msg) const {
   bool fpu_32bit = fpu_32bit_;
   bool mips_isa_gte2 = mips_isa_gte2_;
@@ -225,7 +228,8 @@
       return nullptr;
     }
   }
-  return new MipsInstructionSetFeatures(smp, fpu_32bit, mips_isa_gte2, r6);
+  return std::unique_ptr<const InstructionSetFeatures>(
+      new MipsInstructionSetFeatures(smp, fpu_32bit, mips_isa_gte2, r6));
 }
 
 }  // namespace art
diff --git a/runtime/arch/mips/instruction_set_features_mips.h b/runtime/arch/mips/instruction_set_features_mips.h
index 2d54988..c2a28dc 100644
--- a/runtime/arch/mips/instruction_set_features_mips.h
+++ b/runtime/arch/mips/instruction_set_features_mips.h
@@ -23,29 +23,31 @@
 
 namespace art {
 
+class MipsInstructionSetFeatures;
+using MipsFeaturesUniquePtr = std::unique_ptr<const MipsInstructionSetFeatures>;
+
 // Instruction set features relevant to the MIPS architecture.
 class MipsInstructionSetFeatures FINAL : public InstructionSetFeatures {
  public:
   // Process a CPU variant string like "r4000" and create InstructionSetFeatures.
-  static const MipsInstructionSetFeatures* FromVariant(const std::string& variant,
-                                                        std::string* error_msg);
+  static MipsFeaturesUniquePtr FromVariant(const std::string& variant, std::string* error_msg);
 
   // Parse a bitmap and create an InstructionSetFeatures.
-  static const MipsInstructionSetFeatures* FromBitmap(uint32_t bitmap);
+  static MipsFeaturesUniquePtr FromBitmap(uint32_t bitmap);
 
   // Turn C pre-processor #defines into the equivalent instruction set features.
-  static const MipsInstructionSetFeatures* FromCppDefines();
+  static MipsFeaturesUniquePtr FromCppDefines();
 
   // Process /proc/cpuinfo and use kRuntimeISA to produce InstructionSetFeatures.
-  static const MipsInstructionSetFeatures* FromCpuInfo();
+  static MipsFeaturesUniquePtr FromCpuInfo();
 
   // Process the auxiliary vector AT_HWCAP entry and use kRuntimeISA to produce
   // InstructionSetFeatures.
-  static const MipsInstructionSetFeatures* FromHwcap();
+  static MipsFeaturesUniquePtr FromHwcap();
 
   // Use assembly tests of the current runtime (ie kRuntimeISA) to determine the
   // InstructionSetFeatures. This works around kernel bugs in AT_HWCAP and /proc/cpuinfo.
-  static const MipsInstructionSetFeatures* FromAssembly();
+  static MipsFeaturesUniquePtr FromAssembly();
 
   bool Equals(const InstructionSetFeatures* other) const OVERRIDE;
 
@@ -77,7 +79,7 @@
 
  protected:
   // Parse a vector of the form "fpu32", "mips2" adding these to a new MipsInstructionSetFeatures.
-  virtual const InstructionSetFeatures*
+  std::unique_ptr<const InstructionSetFeatures>
       AddFeaturesFromSplitString(const bool smp, const std::vector<std::string>& features,
                                  std::string* error_msg) const OVERRIDE;
 
diff --git a/runtime/arch/mips/quick_entrypoints_mips.S b/runtime/arch/mips/quick_entrypoints_mips.S
index c3c1882..34e34b4 100644
--- a/runtime/arch/mips/quick_entrypoints_mips.S
+++ b/runtime/arch/mips/quick_entrypoints_mips.S
@@ -1171,10 +1171,11 @@
 END art_quick_unlock_object_no_inline
 
     /*
-     * Entry from managed code that calls artCheckCastFromCode and delivers exception on failure.
+     * Entry from managed code that calls artInstanceOfFromCode and delivers exception on failure.
      */
-    .extern artThrowClassCastException
-ENTRY art_quick_check_cast
+    .extern artInstanceOfFromCode
+    .extern artThrowClassCastExceptionForObject
+ENTRY art_quick_check_instance_of
     addiu  $sp, $sp, -32
     .cfi_adjust_cfa_offset 32
     sw     $gp, 16($sp)
@@ -1183,7 +1184,7 @@
     sw     $t9, 8($sp)
     sw     $a1, 4($sp)
     sw     $a0, 0($sp)
-    la     $t9, artIsAssignableFromCode
+    la     $t9, artInstanceOfFromCode
     jalr   $t9
     addiu  $sp, $sp, -16             # reserve argument slots on the stack
     addiu  $sp, $sp, 16
@@ -1200,10 +1201,10 @@
     addiu  $sp, $sp, 32
     .cfi_adjust_cfa_offset -32
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
-    la   $t9, artThrowClassCastException
-    jalr $zero, $t9                 # artThrowClassCastException (Class*, Class*, Thread*)
+    la   $t9, artThrowClassCastExceptionForObject
+    jalr $zero, $t9                 # artThrowClassCastException (Object*, Class*, Thread*)
     move $a2, rSELF                 # pass Thread::Current
-END art_quick_check_cast
+END art_quick_check_instance_of
 
     /*
      * Restore rReg's value from offset($sp) if rReg is not the same as rExclude.
diff --git a/runtime/arch/mips64/entrypoints_init_mips64.cc b/runtime/arch/mips64/entrypoints_init_mips64.cc
index a037905..bc17d47 100644
--- a/runtime/arch/mips64/entrypoints_init_mips64.cc
+++ b/runtime/arch/mips64/entrypoints_init_mips64.cc
@@ -30,8 +30,8 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" size_t artIsAssignableFromCode(const mirror::Class* klass,
-                                          const mirror::Class* ref_class);
+extern "C" size_t artInstanceOfFromCode(mirror::Object* obj, mirror::Class* ref_class);
+
 // Math entrypoints.
 extern int32_t CmpgDouble(double a, double b);
 extern int32_t CmplDouble(double a, double b);
@@ -59,12 +59,16 @@
 extern "C" int64_t __divdi3(int64_t, int64_t);
 extern "C" int64_t __moddi3(int64_t, int64_t);
 
+// No read barrier entrypoints for marking registers.
+void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints ATTRIBUTE_UNUSED,
+                                  bool is_marking ATTRIBUTE_UNUSED) {}
+
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   DefaultInitEntryPoints(jpoints, qpoints);
 
   // Cast
-  qpoints->pInstanceofNonTrivial = artIsAssignableFromCode;
-  qpoints->pCheckCast = art_quick_check_cast;
+  qpoints->pInstanceofNonTrivial = artInstanceOfFromCode;
+  qpoints->pCheckInstanceOf = art_quick_check_instance_of;
 
   // Math
   qpoints->pCmpgDouble = CmpgDouble;
diff --git a/runtime/arch/mips64/instruction_set_features_mips64.cc b/runtime/arch/mips64/instruction_set_features_mips64.cc
index 5c0c914..490a8d2 100644
--- a/runtime/arch/mips64/instruction_set_features_mips64.cc
+++ b/runtime/arch/mips64/instruction_set_features_mips64.cc
@@ -24,27 +24,27 @@
 
 namespace art {
 
-const Mips64InstructionSetFeatures* Mips64InstructionSetFeatures::FromVariant(
+Mips64FeaturesUniquePtr Mips64InstructionSetFeatures::FromVariant(
     const std::string& variant, std::string* error_msg ATTRIBUTE_UNUSED) {
   if (variant != "default" && variant != "mips64r6") {
     LOG(WARNING) << "Unexpected CPU variant for Mips64 using defaults: " << variant;
   }
   bool smp = true;  // Conservative default.
-  return new Mips64InstructionSetFeatures(smp);
+  return Mips64FeaturesUniquePtr(new Mips64InstructionSetFeatures(smp));
 }
 
-const Mips64InstructionSetFeatures* Mips64InstructionSetFeatures::FromBitmap(uint32_t bitmap) {
+Mips64FeaturesUniquePtr Mips64InstructionSetFeatures::FromBitmap(uint32_t bitmap) {
   bool smp = (bitmap & kSmpBitfield) != 0;
-  return new Mips64InstructionSetFeatures(smp);
+  return Mips64FeaturesUniquePtr(new Mips64InstructionSetFeatures(smp));
 }
 
-const Mips64InstructionSetFeatures* Mips64InstructionSetFeatures::FromCppDefines() {
+Mips64FeaturesUniquePtr Mips64InstructionSetFeatures::FromCppDefines() {
   const bool smp = true;
 
-  return new Mips64InstructionSetFeatures(smp);
+  return Mips64FeaturesUniquePtr(new Mips64InstructionSetFeatures(smp));
 }
 
-const Mips64InstructionSetFeatures* Mips64InstructionSetFeatures::FromCpuInfo() {
+Mips64FeaturesUniquePtr Mips64InstructionSetFeatures::FromCpuInfo() {
   // Look in /proc/cpuinfo for features we need.  Only use this when we can guarantee that
   // the kernel puts the appropriate feature flags in here.  Sometimes it doesn't.
   bool smp = false;
@@ -65,15 +65,15 @@
   } else {
     LOG(ERROR) << "Failed to open /proc/cpuinfo";
   }
-  return new Mips64InstructionSetFeatures(smp);
+  return Mips64FeaturesUniquePtr(new Mips64InstructionSetFeatures(smp));
 }
 
-const Mips64InstructionSetFeatures* Mips64InstructionSetFeatures::FromHwcap() {
+Mips64FeaturesUniquePtr Mips64InstructionSetFeatures::FromHwcap() {
   UNIMPLEMENTED(WARNING);
   return FromCppDefines();
 }
 
-const Mips64InstructionSetFeatures* Mips64InstructionSetFeatures::FromAssembly() {
+Mips64FeaturesUniquePtr Mips64InstructionSetFeatures::FromAssembly() {
   UNIMPLEMENTED(WARNING);
   return FromCppDefines();
 }
@@ -99,7 +99,8 @@
   return result;
 }
 
-const InstructionSetFeatures* Mips64InstructionSetFeatures::AddFeaturesFromSplitString(
+std::unique_ptr<const InstructionSetFeatures>
+Mips64InstructionSetFeatures::AddFeaturesFromSplitString(
     const bool smp, const std::vector<std::string>& features, std::string* error_msg) const {
   auto i = features.begin();
   if (i != features.end()) {
@@ -108,7 +109,7 @@
     *error_msg = StringPrintf("Unknown instruction set feature: '%s'", feature.c_str());
     return nullptr;
   }
-  return new Mips64InstructionSetFeatures(smp);
+  return std::unique_ptr<const InstructionSetFeatures>(new Mips64InstructionSetFeatures(smp));
 }
 
 }  // namespace art
diff --git a/runtime/arch/mips64/instruction_set_features_mips64.h b/runtime/arch/mips64/instruction_set_features_mips64.h
index d5d6012..2e66235 100644
--- a/runtime/arch/mips64/instruction_set_features_mips64.h
+++ b/runtime/arch/mips64/instruction_set_features_mips64.h
@@ -21,29 +21,32 @@
 
 namespace art {
 
+class Mips64InstructionSetFeatures;
+using Mips64FeaturesUniquePtr = std::unique_ptr<const Mips64InstructionSetFeatures>;
+
 // Instruction set features relevant to the MIPS64 architecture.
 class Mips64InstructionSetFeatures FINAL : public InstructionSetFeatures {
  public:
   // Process a CPU variant string like "r4000" and create InstructionSetFeatures.
-  static const Mips64InstructionSetFeatures* FromVariant(const std::string& variant,
-                                                        std::string* error_msg);
+  static Mips64FeaturesUniquePtr FromVariant(const std::string& variant,
+                                                                         std::string* error_msg);
 
   // Parse a bitmap and create an InstructionSetFeatures.
-  static const Mips64InstructionSetFeatures* FromBitmap(uint32_t bitmap);
+  static Mips64FeaturesUniquePtr FromBitmap(uint32_t bitmap);
 
   // Turn C pre-processor #defines into the equivalent instruction set features.
-  static const Mips64InstructionSetFeatures* FromCppDefines();
+  static Mips64FeaturesUniquePtr FromCppDefines();
 
   // Process /proc/cpuinfo and use kRuntimeISA to produce InstructionSetFeatures.
-  static const Mips64InstructionSetFeatures* FromCpuInfo();
+  static Mips64FeaturesUniquePtr FromCpuInfo();
 
   // Process the auxiliary vector AT_HWCAP entry and use kRuntimeISA to produce
   // InstructionSetFeatures.
-  static const Mips64InstructionSetFeatures* FromHwcap();
+  static Mips64FeaturesUniquePtr FromHwcap();
 
   // Use assembly tests of the current runtime (ie kRuntimeISA) to determine the
   // InstructionSetFeatures. This works around kernel bugs in AT_HWCAP and /proc/cpuinfo.
-  static const Mips64InstructionSetFeatures* FromAssembly();
+  static Mips64FeaturesUniquePtr FromAssembly();
 
   bool Equals(const InstructionSetFeatures* other) const OVERRIDE;
 
@@ -59,8 +62,9 @@
 
  protected:
   // Parse a vector of the form "fpu32", "mips2" adding these to a new Mips64InstructionSetFeatures.
-  virtual const InstructionSetFeatures*
-      AddFeaturesFromSplitString(const bool smp, const std::vector<std::string>& features,
+  std::unique_ptr<const InstructionSetFeatures>
+      AddFeaturesFromSplitString(const bool smp,
+                                 const std::vector<std::string>& features,
                                  std::string* error_msg) const OVERRIDE;
 
  private:
diff --git a/runtime/arch/mips64/quick_entrypoints_mips64.S b/runtime/arch/mips64/quick_entrypoints_mips64.S
index 8fc7bc3..0861d2d 100644
--- a/runtime/arch/mips64/quick_entrypoints_mips64.S
+++ b/runtime/arch/mips64/quick_entrypoints_mips64.S
@@ -1256,10 +1256,11 @@
 END art_quick_unlock_object_no_inline
 
     /*
-     * Entry from managed code that calls artCheckCastFromCode and delivers exception on failure.
+     * Entry from managed code that calls artInstanceOfFromCode and delivers exception on failure.
      */
-    .extern artThrowClassCastException
-ENTRY art_quick_check_cast
+    .extern artInstanceOfFromCode
+    .extern artThrowClassCastExceptionForObject
+ENTRY art_quick_check_instance_of
     daddiu $sp, $sp, -32
     .cfi_adjust_cfa_offset 32
     sd     $ra, 24($sp)
@@ -1267,7 +1268,7 @@
     sd     $t9, 16($sp)
     sd     $a1, 8($sp)
     sd     $a0, 0($sp)
-    jal    artIsAssignableFromCode
+    jal    artInstanceOfFromCode
     .cpreturn                       # Restore gp from t8 in branch delay slot.
                                     # t8 may be clobbered in artIsAssignableFromCode.
     beq    $v0, $zero, .Lthrow_class_cast_exception
@@ -1283,10 +1284,10 @@
     .cfi_adjust_cfa_offset -32
     SETUP_GP
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME
-    dla  $t9, artThrowClassCastException
-    jalr $zero, $t9                 # artThrowClassCastException (Class*, Class*, Thread*)
+    dla  $t9, artThrowClassCastExceptionForObject
+    jalr $zero, $t9                 # artThrowClassCastException (Object*, Class*, Thread*)
     move $a2, rSELF                 # pass Thread::Current
-END art_quick_check_cast
+END art_quick_check_instance_of
 
 
     /*
@@ -1368,7 +1369,7 @@
     bne    $a0, $zero, .Lart_quick_aput_obj_with_bound_check_gp_set
     nop
     b art_quick_throw_null_pointer_exception
-    nop
+    .cpreturn                       # Restore gp from t8 in branch delay slot.
 END art_quick_aput_obj_with_null_and_bound_check
 
 ENTRY art_quick_aput_obj_with_bound_check
@@ -1377,8 +1378,9 @@
     bne  $t1, $zero, .Lart_quick_aput_obj_gp_set
     nop
     move $a0, $a1
-    b art_quick_throw_array_bounds
     move $a1, $t0
+    b art_quick_throw_array_bounds
+    .cpreturn                       # Restore gp from t8 in branch delay slot.
 END art_quick_aput_obj_with_bound_check
 
 ENTRY art_quick_aput_obj
diff --git a/runtime/arch/stub_test.cc b/runtime/arch/stub_test.cc
index 432ba36..bbf9a8b 100644
--- a/runtime/arch/stub_test.cc
+++ b/runtime/arch/stub_test.cc
@@ -23,6 +23,7 @@
 #include "common_runtime_test.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
 #include "imt_conflict_table.h"
+#include "jni_internal.h"
 #include "linear_alloc.h"
 #include "mirror/class-inl.h"
 #include "mirror/string-inl.h"
@@ -805,7 +806,7 @@
 
 #if defined(__i386__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
     (defined(__x86_64__) && !defined(__APPLE__))
-extern "C" void art_quick_check_cast(void);
+extern "C" void art_quick_check_instance_of(void);
 #endif
 
 TEST_F(StubTest, CheckCast) {
@@ -813,40 +814,90 @@
     (defined(__x86_64__) && !defined(__APPLE__))
   Thread* self = Thread::Current();
 
-  const uintptr_t art_quick_check_cast = StubTest::GetEntrypoint(self, kQuickCheckCast);
+  const uintptr_t art_quick_check_instance_of =
+      StubTest::GetEntrypoint(self, kQuickCheckInstanceOf);
 
   // Find some classes.
   ScopedObjectAccess soa(self);
   // garbage is created during ClassLinker::Init
 
-  StackHandleScope<2> hs(soa.Self());
-  Handle<mirror::Class> c(
-      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/Object;")));
-  Handle<mirror::Class> c2(
-      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/String;")));
+  VariableSizedHandleScope hs(soa.Self());
+  Handle<mirror::Class> klass_obj(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Object;")));
+  Handle<mirror::Class> klass_str(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/String;")));
+  Handle<mirror::Class> klass_list(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "Ljava/util/List;")));
+  Handle<mirror::Class> klass_cloneable(
+        hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Cloneable;")));
+  Handle<mirror::Class> klass_array_list(
+      hs.NewHandle(class_linker_->FindSystemClass(soa.Self(), "Ljava/util/ArrayList;")));
+  Handle<mirror::Object> obj(hs.NewHandle(klass_obj->AllocObject(soa.Self())));
+  Handle<mirror::String> string(hs.NewHandle(
+      mirror::String::AllocFromModifiedUtf8(soa.Self(), "ABCD")));
+  Handle<mirror::Object> array_list(hs.NewHandle(klass_array_list->AllocObject(soa.Self())));
 
   EXPECT_FALSE(self->IsExceptionPending());
 
-  Invoke3(reinterpret_cast<size_t>(c.Get()), reinterpret_cast<size_t>(c.Get()), 0U,
-          art_quick_check_cast, self);
-
+  Invoke3(reinterpret_cast<size_t>(obj.Get()),
+          reinterpret_cast<size_t>(klass_obj.Get()),
+          0U,
+          art_quick_check_instance_of,
+          self);
   EXPECT_FALSE(self->IsExceptionPending());
 
-  Invoke3(reinterpret_cast<size_t>(c2.Get()), reinterpret_cast<size_t>(c2.Get()), 0U,
-          art_quick_check_cast, self);
-
+  // Expected true: Test string instance of java.lang.String.
+  Invoke3(reinterpret_cast<size_t>(string.Get()),
+          reinterpret_cast<size_t>(klass_str.Get()),
+          0U,
+          art_quick_check_instance_of,
+          self);
   EXPECT_FALSE(self->IsExceptionPending());
 
-  Invoke3(reinterpret_cast<size_t>(c.Get()), reinterpret_cast<size_t>(c2.Get()), 0U,
-          art_quick_check_cast, self);
-
+  // Expected true: Test string instance of java.lang.Object.
+  Invoke3(reinterpret_cast<size_t>(string.Get()),
+          reinterpret_cast<size_t>(klass_obj.Get()),
+          0U,
+          art_quick_check_instance_of,
+          self);
   EXPECT_FALSE(self->IsExceptionPending());
 
-  // TODO: Make the following work. But that would require correct managed frames.
+  // Expected false: Test object instance of java.lang.String.
+  Invoke3(reinterpret_cast<size_t>(obj.Get()),
+          reinterpret_cast<size_t>(klass_str.Get()),
+          0U,
+          art_quick_check_instance_of,
+          self);
+  EXPECT_TRUE(self->IsExceptionPending());
+  self->ClearException();
 
-  Invoke3(reinterpret_cast<size_t>(c2.Get()), reinterpret_cast<size_t>(c.Get()), 0U,
-          art_quick_check_cast, self);
+  Invoke3(reinterpret_cast<size_t>(array_list.Get()),
+          reinterpret_cast<size_t>(klass_list.Get()),
+          0U,
+          art_quick_check_instance_of,
+          self);
+  EXPECT_FALSE(self->IsExceptionPending());
 
+  Invoke3(reinterpret_cast<size_t>(array_list.Get()),
+          reinterpret_cast<size_t>(klass_cloneable.Get()),
+          0U,
+          art_quick_check_instance_of,
+          self);
+  EXPECT_FALSE(self->IsExceptionPending());
+
+  Invoke3(reinterpret_cast<size_t>(string.Get()),
+          reinterpret_cast<size_t>(klass_array_list.Get()),
+          0U,
+          art_quick_check_instance_of,
+          self);
+  EXPECT_TRUE(self->IsExceptionPending());
+  self->ClearException();
+
+  Invoke3(reinterpret_cast<size_t>(string.Get()),
+          reinterpret_cast<size_t>(klass_cloneable.Get()),
+          0U,
+          art_quick_check_instance_of,
+          self);
   EXPECT_TRUE(self->IsExceptionPending());
   self->ClearException();
 
@@ -1168,7 +1219,7 @@
                             reinterpret_cast<size_t>(nullptr),
                             StubTest::GetEntrypoint(self, kQuickAllocArrayResolved),
                             self);
-    EXPECT_FALSE(self->IsExceptionPending()) << PrettyTypeOf(self->GetException());
+    EXPECT_FALSE(self->IsExceptionPending()) << mirror::Object::PrettyTypeOf(self->GetException());
     EXPECT_NE(reinterpret_cast<size_t>(nullptr), result);
     mirror::Object* obj = reinterpret_cast<mirror::Object*>(result);
     EXPECT_TRUE(obj->IsArrayInstance());
@@ -1964,7 +2015,7 @@
   ASSERT_NE(nullptr, add_jmethod);
 
   // Get representation.
-  ArtMethod* contains_amethod = soa.DecodeMethod(contains_jmethod);
+  ArtMethod* contains_amethod = jni::DecodeArtMethod(contains_jmethod);
 
   // Patch up ArrayList.contains.
   if (contains_amethod->GetEntryPointFromQuickCompiledCode() == nullptr) {
@@ -1982,7 +2033,7 @@
   ASSERT_NE(nullptr, inf_contains_jmethod);
 
   // Get mirror representation.
-  ArtMethod* inf_contains = soa.DecodeMethod(inf_contains_jmethod);
+  ArtMethod* inf_contains = jni::DecodeArtMethod(inf_contains_jmethod);
 
   // Object
 
@@ -2036,7 +2087,7 @@
 
   env->CallBooleanMethod(jarray_list, add_jmethod, jobj);
 
-  ASSERT_FALSE(self->IsExceptionPending()) << PrettyTypeOf(self->GetException());
+  ASSERT_FALSE(self->IsExceptionPending()) << mirror::Object::PrettyTypeOf(self->GetException());
 
   // Contains.
 
diff --git a/runtime/arch/x86/entrypoints_init_x86.cc b/runtime/arch/x86/entrypoints_init_x86.cc
index 0a10a3c..9cd4a3e 100644
--- a/runtime/arch/x86/entrypoints_init_x86.cc
+++ b/runtime/arch/x86/entrypoints_init_x86.cc
@@ -27,8 +27,7 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" size_t art_quick_is_assignable(const mirror::Class* klass,
-                                          const mirror::Class* ref_class);
+extern "C" size_t art_quick_instance_of(mirror::Object* obj, mirror::Class* ref_class);
 
 // Read barrier entrypoints.
 // art_quick_read_barrier_mark_regX uses an non-standard calling
@@ -45,12 +44,22 @@
 extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
 extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*);
 
+void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) {
+  qpoints->pReadBarrierMarkReg00 = is_marking ? art_quick_read_barrier_mark_reg00 : nullptr;
+  qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr;
+  qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr;
+  qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr;
+  qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr;
+  qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr;
+  qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr;
+}
+
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
   DefaultInitEntryPoints(jpoints, qpoints);
 
   // Cast
-  qpoints->pInstanceofNonTrivial = art_quick_is_assignable;
-  qpoints->pCheckCast = art_quick_check_cast;
+  qpoints->pInstanceofNonTrivial = art_quick_instance_of;
+  qpoints->pCheckInstanceOf = art_quick_check_instance_of;
 
   // More math.
   qpoints->pCos = cos;
@@ -88,14 +97,8 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
-  qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
-  qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
-  qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
-  qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
+  UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false);
   qpoints->pReadBarrierMarkReg04 = nullptr;  // Cannot use register 4 (ESP) to pass arguments.
-  qpoints->pReadBarrierMarkReg05 = art_quick_read_barrier_mark_reg05;
-  qpoints->pReadBarrierMarkReg06 = art_quick_read_barrier_mark_reg06;
-  qpoints->pReadBarrierMarkReg07 = art_quick_read_barrier_mark_reg07;
   // x86 has only 8 core registers.
   qpoints->pReadBarrierMarkReg08 = nullptr;
   qpoints->pReadBarrierMarkReg09 = nullptr;
diff --git a/runtime/arch/x86/instruction_set_features_x86.cc b/runtime/arch/x86/instruction_set_features_x86.cc
index 0093e82..90b55a9 100644
--- a/runtime/arch/x86/instruction_set_features_x86.cc
+++ b/runtime/arch/x86/instruction_set_features_x86.cc
@@ -49,7 +49,34 @@
     "silvermont",
 };
 
-const X86InstructionSetFeatures* X86InstructionSetFeatures::FromVariant(
+X86FeaturesUniquePtr X86InstructionSetFeatures::Create(bool x86_64,
+                                                       bool smp,
+                                                       bool has_SSSE3,
+                                                       bool has_SSE4_1,
+                                                       bool has_SSE4_2,
+                                                       bool has_AVX,
+                                                       bool has_AVX2,
+                                                       bool has_POPCNT) {
+  if (x86_64) {
+    return X86FeaturesUniquePtr(new X86_64InstructionSetFeatures(smp,
+                                                                 has_SSSE3,
+                                                                 has_SSE4_1,
+                                                                 has_SSE4_2,
+                                                                 has_AVX,
+                                                                 has_AVX2,
+                                                                 has_POPCNT));
+  } else {
+    return X86FeaturesUniquePtr(new X86InstructionSetFeatures(smp,
+                                                              has_SSSE3,
+                                                              has_SSE4_1,
+                                                              has_SSE4_2,
+                                                              has_AVX,
+                                                              has_AVX2,
+                                                              has_POPCNT));
+  }
+}
+
+X86FeaturesUniquePtr X86InstructionSetFeatures::FromVariant(
     const std::string& variant, std::string* error_msg ATTRIBUTE_UNUSED,
     bool x86_64) {
   bool smp = true;  // Conservative default.
@@ -75,17 +102,10 @@
     LOG(WARNING) << "Unexpected CPU variant for X86 using defaults: " << variant;
   }
 
-  if (x86_64) {
-    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, has_POPCNT);
-  } else {
-    return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, has_POPCNT);
-  }
+  return Create(x86_64, smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2, has_POPCNT);
 }
 
-const X86InstructionSetFeatures* X86InstructionSetFeatures::FromBitmap(uint32_t bitmap,
-                                                                       bool x86_64) {
+X86FeaturesUniquePtr X86InstructionSetFeatures::FromBitmap(uint32_t bitmap, bool x86_64) {
   bool smp = (bitmap & kSmpBitfield) != 0;
   bool has_SSSE3 = (bitmap & kSsse3Bitfield) != 0;
   bool has_SSE4_1 = (bitmap & kSse4_1Bitfield) != 0;
@@ -93,16 +113,10 @@
   bool has_AVX = (bitmap & kAvxBitfield) != 0;
   bool has_AVX2 = (bitmap & kAvxBitfield) != 0;
   bool has_POPCNT = (bitmap & kPopCntBitfield) != 0;
-  if (x86_64) {
-    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2,
-                                            has_AVX, has_AVX2, has_POPCNT);
-  } else {
-    return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2,
-                                         has_AVX, has_AVX2, has_POPCNT);
-  }
+  return Create(x86_64, smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2, has_POPCNT);
 }
 
-const X86InstructionSetFeatures* X86InstructionSetFeatures::FromCppDefines(bool x86_64) {
+X86FeaturesUniquePtr X86InstructionSetFeatures::FromCppDefines(bool x86_64) {
   const bool smp = true;
 
 #ifndef __SSSE3__
@@ -141,16 +155,10 @@
   const bool has_POPCNT = true;
 #endif
 
-  if (x86_64) {
-    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, has_POPCNT);
-  } else {
-    return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                         has_AVX2, has_POPCNT);
-  }
+  return Create(x86_64, smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2, has_POPCNT);
 }
 
-const X86InstructionSetFeatures* X86InstructionSetFeatures::FromCpuInfo(bool x86_64) {
+X86FeaturesUniquePtr X86InstructionSetFeatures::FromCpuInfo(bool x86_64) {
   // Look in /proc/cpuinfo for features we need.  Only use this when we can guarantee that
   // the kernel puts the appropriate feature flags in here.  Sometimes it doesn't.
   bool smp = false;
@@ -198,21 +206,15 @@
   } else {
     LOG(ERROR) << "Failed to open /proc/cpuinfo";
   }
-  if (x86_64) {
-    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, has_POPCNT);
-  } else {
-    return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                         has_AVX2, has_POPCNT);
-  }
+  return Create(x86_64, smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2, has_POPCNT);
 }
 
-const X86InstructionSetFeatures* X86InstructionSetFeatures::FromHwcap(bool x86_64) {
+X86FeaturesUniquePtr X86InstructionSetFeatures::FromHwcap(bool x86_64) {
   UNIMPLEMENTED(WARNING);
   return FromCppDefines(x86_64);
 }
 
-const X86InstructionSetFeatures* X86InstructionSetFeatures::FromAssembly(bool x86_64) {
+X86FeaturesUniquePtr X86InstructionSetFeatures::FromAssembly(bool x86_64) {
   UNIMPLEMENTED(WARNING);
   return FromCppDefines(x86_64);
 }
@@ -281,7 +283,7 @@
   return result;
 }
 
-const InstructionSetFeatures* X86InstructionSetFeatures::AddFeaturesFromSplitString(
+std::unique_ptr<const InstructionSetFeatures> X86InstructionSetFeatures::AddFeaturesFromSplitString(
     const bool smp, const std::vector<std::string>& features, bool x86_64,
     std::string* error_msg) const {
   bool has_SSSE3 = has_SSSE3_;
@@ -321,13 +323,7 @@
       return nullptr;
     }
   }
-  if (x86_64) {
-    return new X86_64InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                            has_AVX2, has_POPCNT);
-  } else {
-    return new X86InstructionSetFeatures(smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX,
-                                         has_AVX2, has_POPCNT);
-  }
+  return Create(x86_64, smp, has_SSSE3, has_SSE4_1, has_SSE4_2, has_AVX, has_AVX2, has_POPCNT);
 }
 
 }  // namespace art
diff --git a/runtime/arch/x86/instruction_set_features_x86.h b/runtime/arch/x86/instruction_set_features_x86.h
index 2aa8ae6..672892e 100644
--- a/runtime/arch/x86/instruction_set_features_x86.h
+++ b/runtime/arch/x86/instruction_set_features_x86.h
@@ -21,30 +21,34 @@
 
 namespace art {
 
+class X86InstructionSetFeatures;
+using X86FeaturesUniquePtr = std::unique_ptr<const X86InstructionSetFeatures>;
+
 // Instruction set features relevant to the X86 architecture.
 class X86InstructionSetFeatures : public InstructionSetFeatures {
  public:
   // Process a CPU variant string like "atom" or "nehalem" and create InstructionSetFeatures.
-  static const X86InstructionSetFeatures* FromVariant(const std::string& variant,
-                                                        std::string* error_msg,
-                                                        bool x86_64 = false);
+  static X86FeaturesUniquePtr FromVariant(const std::string& variant,
+                                                                      std::string* error_msg,
+                                                                      bool x86_64 = false);
 
   // Parse a bitmap and create an InstructionSetFeatures.
-  static const X86InstructionSetFeatures* FromBitmap(uint32_t bitmap, bool x86_64 = false);
+  static X86FeaturesUniquePtr FromBitmap(uint32_t bitmap,
+                                                                     bool x86_64 = false);
 
   // Turn C pre-processor #defines into the equivalent instruction set features.
-  static const X86InstructionSetFeatures* FromCppDefines(bool x86_64 = false);
+  static X86FeaturesUniquePtr FromCppDefines(bool x86_64 = false);
 
   // Process /proc/cpuinfo and use kRuntimeISA to produce InstructionSetFeatures.
-  static const X86InstructionSetFeatures* FromCpuInfo(bool x86_64 = false);
+  static X86FeaturesUniquePtr FromCpuInfo(bool x86_64 = false);
 
   // Process the auxiliary vector AT_HWCAP entry and use kRuntimeISA to produce
   // InstructionSetFeatures.
-  static const X86InstructionSetFeatures* FromHwcap(bool x86_64 = false);
+  static X86FeaturesUniquePtr FromHwcap(bool x86_64 = false);
 
   // Use assembly tests of the current runtime (ie kRuntimeISA) to determine the
   // InstructionSetFeatures. This works around kernel bugs in AT_HWCAP and /proc/cpuinfo.
-  static const X86InstructionSetFeatures* FromAssembly(bool x86_64 = false);
+  static X86FeaturesUniquePtr FromAssembly(bool x86_64 = false);
 
   bool Equals(const InstructionSetFeatures* other) const OVERRIDE;
 
@@ -64,13 +68,13 @@
 
  protected:
   // Parse a string of the form "ssse3" adding these to a new InstructionSetFeatures.
-  virtual const InstructionSetFeatures*
+  virtual std::unique_ptr<const InstructionSetFeatures>
       AddFeaturesFromSplitString(const bool smp, const std::vector<std::string>& features,
                                  std::string* error_msg) const OVERRIDE {
     return AddFeaturesFromSplitString(smp, features, false, error_msg);
   }
 
-  const InstructionSetFeatures*
+  std::unique_ptr<const InstructionSetFeatures>
       AddFeaturesFromSplitString(const bool smp, const std::vector<std::string>& features,
                                  bool x86_64, std::string* error_msg) const;
 
@@ -85,6 +89,15 @@
         has_POPCNT_(has_POPCNT) {
   }
 
+  static X86FeaturesUniquePtr Create(bool x86_64,
+                                     bool smp,
+                                     bool has_SSSE3,
+                                     bool has_SSE4_1,
+                                     bool has_SSE4_2,
+                                     bool has_AVX,
+                                     bool has_AVX2,
+                                     bool has_POPCNT);
+
  private:
   // Bitmap positions for encoding features as a bitmap.
   enum {
diff --git a/runtime/arch/x86/quick_entrypoints_x86.S b/runtime/arch/x86/quick_entrypoints_x86.S
index 7bb59ef..fb405fa 100644
--- a/runtime/arch/x86/quick_entrypoints_x86.S
+++ b/runtime/arch/x86/quick_entrypoints_x86.S
@@ -224,12 +224,11 @@
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
-     * when EDI is already saved.
+     * when EDI and ESI are already saved.
      */
-MACRO2(SETUP_SAVE_EVERYTHING_FRAME_EDI_SAVED, got_reg, temp_reg)
+MACRO2(SETUP_SAVE_EVERYTHING_FRAME_EDI_ESI_SAVED, got_reg, temp_reg)
     // Save core registers from highest to lowest to agree with core spills bitmap.
-    // EDI, or at least a placeholder for it, is already on the stack.
-    PUSH esi
+    // EDI and ESI, or at least placeholders for them, are already on the stack.
     PUSH ebp
     PUSH ebx
     PUSH edx
@@ -268,13 +267,25 @@
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     * when EDI is already saved.
+     */
+MACRO2(SETUP_SAVE_EVERYTHING_FRAME_EDI_SAVED, got_reg, temp_reg)
+    // Save core registers from highest to lowest to agree with core spills bitmap.
+    // EDI, or at least a placeholder for it, is already on the stack.
+    PUSH esi
+    SETUP_SAVE_EVERYTHING_FRAME_EDI_ESI_SAVED RAW_VAR(got_reg), RAW_VAR(temp_reg)
+END_MACRO
+
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
      */
 MACRO2(SETUP_SAVE_EVERYTHING_FRAME, got_reg, temp_reg)
     PUSH edi
     SETUP_SAVE_EVERYTHING_FRAME_EDI_SAVED RAW_VAR(got_reg), RAW_VAR(temp_reg)
 END_MACRO
 
-MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
+MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_FRPS)
     // Restore FPRs. Method and padding is still on the stack.
     movsd 16(%esp), %xmm0
     movsd 24(%esp), %xmm1
@@ -284,13 +295,10 @@
     movsd 56(%esp), %xmm5
     movsd 64(%esp), %xmm6
     movsd 72(%esp), %xmm7
+END_MACRO
 
-    // Remove save everything callee save method, stack alignment padding and FPRs.
-    addl MACRO_LITERAL(16 + 8 * 8), %esp
-    CFI_ADJUST_CFA_OFFSET(-(16 + 8 * 8))
-
-    // Restore core registers.
-    POP eax
+MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_EAX)
+    // Restore core registers (except eax).
     POP ecx
     POP edx
     POP ebx
@@ -299,12 +307,32 @@
     POP edi
 END_MACRO
 
+MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
+    RESTORE_SAVE_EVERYTHING_FRAME_FRPS
+
+    // Remove save everything callee save method, stack alignment padding and FPRs.
+    addl MACRO_LITERAL(16 + 8 * 8), %esp
+    CFI_ADJUST_CFA_OFFSET(-(16 + 8 * 8))
+
+    POP eax
+    RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_EAX
+END_MACRO
+
+MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_KEEP_EAX)
+    RESTORE_SAVE_EVERYTHING_FRAME_FRPS
+
+    // Remove save everything callee save method, stack alignment padding and FPRs, skip EAX.
+    addl MACRO_LITERAL(16 + 8 * 8 + 4), %esp
+    CFI_ADJUST_CFA_OFFSET(-(16 + 8 * 8 + 4))
+
+    RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_EAX
+END_MACRO
+
     /*
-     * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
-     * exception is Thread::Current()->exception_.
+     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
+     * exception is Thread::Current()->exception_ when the runtime method frame is ready.
      */
-MACRO0(DELIVER_PENDING_EXCEPTION)
-    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save callee saves for throw
+MACRO0(DELIVER_PENDING_EXCEPTION_FRAME_READY)
     // Outgoing argument set up
     subl MACRO_LITERAL(12), %esp               // alignment padding
     CFI_ADJUST_CFA_OFFSET(12)
@@ -314,6 +342,15 @@
     UNREACHABLE
 END_MACRO
 
+    /*
+     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
+     * exception is Thread::Current()->exception_.
+     */
+MACRO0(DELIVER_PENDING_EXCEPTION)
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save callee saves for throw
+    DELIVER_PENDING_EXCEPTION_FRAME_READY
+END_MACRO
+
 MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name)
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME ebx, ebx // save all registers as basis for long jump context
@@ -1114,26 +1151,42 @@
 END_FUNCTION art_quick_alloc_object_region_tlab
 
 DEFINE_FUNCTION art_quick_resolve_string
-    movl 4(%esp), %ecx                                           // get referrer
-    movl ART_METHOD_DECLARING_CLASS_OFFSET(%ecx), %ecx           // get declaring class
-    movl DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%ecx), %ecx    // get string dex cache
-    movl LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %edx
-    andl %eax, %edx
-    movlps (%ecx, %edx, STRING_DEX_CACHE_ELEMENT_SIZE), %xmm0    // load string idx and ptr to xmm0
-    movd %xmm0, %ecx                                             // extract pointer
+    PUSH edi
+    PUSH esi
+    // Save xmm0 at an aligned address on the stack.
+    subl MACRO_LITERAL(12), %esp
+    CFI_ADJUST_CFA_OFFSET(12)
+    movsd %xmm0, 0(%esp)
+    movl 24(%esp), %edi                                          // get referrer
+    movl ART_METHOD_DECLARING_CLASS_OFFSET(%edi), %edi           // get declaring class
+    movl DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%edi), %edi    // get string dex cache
+    movl LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %esi
+    andl %eax, %esi
+    movlps (%edi, %esi, STRING_DEX_CACHE_ELEMENT_SIZE), %xmm0    // load string idx and ptr to xmm0
+    movd %xmm0, %edi                                             // extract pointer
     pshufd LITERAL(0x55), %xmm0, %xmm0                           // shuffle index into lowest bits
-    movd %xmm0, %edx                                             // extract index
-    cmp %edx, %eax
+    movd %xmm0, %esi                                             // extract index
+    // Restore xmm0 and remove it together with padding from the stack.
+    movsd 0(%esp), %xmm0
+    addl MACRO_LITERAL(12), %esp
+    CFI_ADJUST_CFA_OFFSET(-12)
+    cmp %esi, %eax
     jne .Lart_quick_resolve_string_slow_path
-    movl %ecx, %eax
+    movl %edi, %eax
+    CFI_REMEMBER_STATE
+    POP esi
+    POP edi
 #ifdef USE_READ_BARRIER
     cmpl LITERAL(0), %fs:THREAD_IS_GC_MARKING_OFFSET
-    jne .Lart_quick_resolve_string_marking
+    jne .Lnot_null_art_quick_read_barrier_mark_reg00
 #endif
     ret
+    CFI_RESTORE_STATE
+    CFI_DEF_CFA(esp, 24)                          // workaround for clang bug: 31975598
+
 .Lart_quick_resolve_string_slow_path:
     // Outgoing argument set up
-    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx
+    SETUP_SAVE_EVERYTHING_FRAME_EDI_ESI_SAVED ebx, ebx
     subl LITERAL(8), %esp                                        // push padding
     CFI_ADJUST_CFA_OFFSET(8)
     pushl %fs:THREAD_SELF_OFFSET                                 // pass Thread::Current()
@@ -1142,21 +1195,15 @@
     call SYMBOL(artResolveStringFromCode)
     addl LITERAL(16), %esp                                       // pop arguments
     CFI_ADJUST_CFA_OFFSET(-16)
-    RESTORE_SAVE_REFS_ONLY_FRAME
-    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-.Lart_quick_resolve_string_marking:
-    SETUP_SAVE_REFS_ONLY_FRAME  ebx, ebx
-    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%eax)
-    jnz .Lart_quick_resolve_string_no_rb
-    subl LITERAL(12), %esp                                   // alignment padding
-    CFI_ADJUST_CFA_OFFSET(12)
-    PUSH eax                                                 // Pass the string as the first param.
-    call SYMBOL(artReadBarrierMark)
-    addl LITERAL(16), %esp
-    CFI_ADJUST_CFA_OFFSET(-16)
-.Lart_quick_resolve_string_no_rb:
-    RESTORE_SAVE_REFS_ONLY_FRAME
+    testl %eax, %eax                                        // If result is null, deliver the OOME.
+    jz 1f
+    CFI_REMEMBER_STATE
+    RESTORE_SAVE_EVERYTHING_FRAME_KEEP_EAX
     ret
+    CFI_RESTORE_STATE
+    CFI_DEF_CFA(esp, FRAME_SIZE_SAVE_EVERYTHING)  // workaround for clang bug: 31975598
+1:
+    DELIVER_PENDING_EXCEPTION_FRAME_READY
 END_FUNCTION art_quick_resolve_string
 
 ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
@@ -1304,21 +1351,21 @@
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_unlock_object_no_inline
 
-DEFINE_FUNCTION art_quick_is_assignable
+DEFINE_FUNCTION art_quick_instance_of
     PUSH eax                              // alignment padding
     PUSH ecx                              // pass arg2 - obj->klass
     PUSH eax                              // pass arg1 - checked class
-    call SYMBOL(artIsAssignableFromCode)  // (Class* klass, Class* ref_klass)
+    call SYMBOL(artInstanceOfFromCode)    // (Object* obj, Class* ref_klass)
     addl LITERAL(12), %esp                // pop arguments
     CFI_ADJUST_CFA_OFFSET(-12)
     ret
-END_FUNCTION art_quick_is_assignable
+END_FUNCTION art_quick_instance_of
 
-DEFINE_FUNCTION art_quick_check_cast
+DEFINE_FUNCTION art_quick_check_instance_of
     PUSH eax                              // alignment padding
-    PUSH ecx                              // pass arg2 - obj->klass
-    PUSH eax                              // pass arg1 - checked class
-    call SYMBOL(artIsAssignableFromCode)  // (Class* klass, Class* ref_klass)
+    PUSH ecx                              // pass arg2 - checked class
+    PUSH eax                              // pass arg1 - obj
+    call SYMBOL(artInstanceOfFromCode)    // (Object* obj, Class* ref_klass)
     testl %eax, %eax
     jz 1f                                 // jump forward if not assignable
     addl LITERAL(12), %esp                // pop arguments
@@ -1338,9 +1385,9 @@
     CFI_ADJUST_CFA_OFFSET(4)
     PUSH ecx                              // pass arg2
     PUSH eax                              // pass arg1
-    call SYMBOL(artThrowClassCastException) // (Class* a, Class* b, Thread*)
+    call SYMBOL(artThrowClassCastExceptionForObject)  // (Object* src, Class* dest, Thread*)
     UNREACHABLE
-END_FUNCTION art_quick_check_cast
+END_FUNCTION art_quick_check_instance_of
 
 // Restore reg's value if reg is not the same as exclude_reg, otherwise just adjust stack.
 MACRO2(POP_REG_NE, reg, exclude_reg)
@@ -1988,15 +2035,14 @@
     lea MIRROR_STRING_VALUE_OFFSET(%ecx), %edi
 #if (STRING_COMPRESSION_FEATURE)
     /* Differ cases */
-    cmpl    LITERAL(0), %edx
-    jl      .Lstring_compareto_this_is_compressed
-    cmpl    LITERAL(0), %ebx
-    jl      .Lstring_compareto_that_is_compressed
+    shrl    LITERAL(1), %edx
+    jnc     .Lstring_compareto_this_is_compressed
+    shrl    LITERAL(1), %ebx
+    jnc     .Lstring_compareto_that_is_compressed
     jmp     .Lstring_compareto_both_not_compressed
 .Lstring_compareto_this_is_compressed:
-    andl    LITERAL(0x7FFFFFFF), %edx
-    cmpl    LITERAL(0), %ebx
-    jl      .Lstring_compareto_both_compressed
+    shrl    LITERAL(1), %ebx
+    jnc     .Lstring_compareto_both_compressed
     /* If (this->IsCompressed() && that->IsCompressed() == false) */
     mov     %edx, %eax
     subl    %ebx, %eax
@@ -2014,7 +2060,6 @@
     cmovne  %edx, %eax                        // return eax = *(this_cur_char) - *(that_cur_char)
     jmp     .Lstring_compareto_return
 .Lstring_compareto_that_is_compressed:
-    andl    LITERAL(0x7FFFFFFF), %ebx
     mov     %edx, %eax
     subl    %ebx, %eax
     mov     %edx, %ecx
@@ -2031,7 +2076,6 @@
     cmovne  %edx, %eax
     jmp     .Lstring_compareto_return         // return eax = *(this_cur_char) - *(that_cur_char)
 .Lstring_compareto_both_compressed:
-    andl    LITERAL(0x7FFFFFFF), %ebx
     /* Calculate min length and count diff */
     mov     %edx, %ecx
     mov     %edx, %eax
@@ -2102,13 +2146,21 @@
     // Null check so that we can load the lock word.
     test REG_VAR(reg), REG_VAR(reg)
     jz .Lret_rb_\name
+.Lnot_null_\name:
     // Check the mark bit, if it is 1 return.
     testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg))
     jz .Lslow_rb_\name
     ret
 .Lslow_rb_\name:
-    // Save all potentially live caller-save core registers.
     PUSH eax
+    mov MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)), %eax
+    add LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW), %eax
+    // Jump if overflow, the only case where it overflows should be the forwarding address one.
+    // Taken ~25% of the time.
+    jnae .Lret_forwarding_address\name
+
+    // Save all potentially live caller-save core registers.
+    mov 0(%esp), %eax
     PUSH ecx
     PUSH edx
     PUSH ebx
@@ -2156,6 +2208,12 @@
     POP_REG_NE eax, RAW_VAR(reg)
 .Lret_rb_\name:
     ret
+.Lret_forwarding_address\name:
+    // The overflow cleared the top bits.
+    sall LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT), %eax
+    mov %eax, REG_VAR(reg)
+    POP_REG_NE eax, RAW_VAR(reg)
+    ret
     END_FUNCTION VAR(name)
 END_MACRO
 
diff --git a/runtime/arch/x86_64/asm_support_x86_64.S b/runtime/arch/x86_64/asm_support_x86_64.S
index af4a6c4..28018c5 100644
--- a/runtime/arch/x86_64/asm_support_x86_64.S
+++ b/runtime/arch/x86_64/asm_support_x86_64.S
@@ -76,6 +76,8 @@
     #define CFI_DEF_CFA_REGISTER(reg) .cfi_def_cfa_register reg
     #define CFI_RESTORE(reg) .cfi_restore reg
     #define CFI_REL_OFFSET(reg,size) .cfi_rel_offset reg,size
+    #define CFI_RESTORE_STATE .cfi_restore_state
+    #define CFI_REMEMBER_STATE .cfi_remember_state
 #else
     // Mac OS' doesn't like cfi_* directives.
     #define CFI_STARTPROC
@@ -85,6 +87,8 @@
     #define CFI_DEF_CFA_REGISTER(reg)
     #define CFI_RESTORE(reg)
     #define CFI_REL_OFFSET(reg,size)
+    #define CFI_RESTORE_STATE
+    #define CFI_REMEMBER_STATE
 #endif
 
     // Symbols.
diff --git a/runtime/arch/x86_64/entrypoints_init_x86_64.cc b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
index 8c425d5..a326b4e 100644
--- a/runtime/arch/x86_64/entrypoints_init_x86_64.cc
+++ b/runtime/arch/x86_64/entrypoints_init_x86_64.cc
@@ -30,8 +30,7 @@
 namespace art {
 
 // Cast entrypoints.
-extern "C" size_t art_quick_assignable_from_code(const mirror::Class* klass,
-                                                 const mirror::Class* ref_class);
+extern "C" size_t art_quick_instance_of(mirror::Object* obj, mirror::Class* ref_class);
 
 // Read barrier entrypoints.
 // art_quick_read_barrier_mark_regX uses an non-standard calling
@@ -56,6 +55,24 @@
 extern "C" mirror::Object* art_quick_read_barrier_slow(mirror::Object*, mirror::Object*, uint32_t);
 extern "C" mirror::Object* art_quick_read_barrier_for_root_slow(GcRoot<mirror::Object>*);
 
+void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking) {
+  qpoints->pReadBarrierMarkReg00 = is_marking ? art_quick_read_barrier_mark_reg00 : nullptr;
+  qpoints->pReadBarrierMarkReg01 = is_marking ? art_quick_read_barrier_mark_reg01 : nullptr;
+  qpoints->pReadBarrierMarkReg02 = is_marking ? art_quick_read_barrier_mark_reg02 : nullptr;
+  qpoints->pReadBarrierMarkReg03 = is_marking ? art_quick_read_barrier_mark_reg03 : nullptr;
+  qpoints->pReadBarrierMarkReg05 = is_marking ? art_quick_read_barrier_mark_reg05 : nullptr;
+  qpoints->pReadBarrierMarkReg06 = is_marking ? art_quick_read_barrier_mark_reg06 : nullptr;
+  qpoints->pReadBarrierMarkReg07 = is_marking ? art_quick_read_barrier_mark_reg07 : nullptr;
+  qpoints->pReadBarrierMarkReg08 = is_marking ? art_quick_read_barrier_mark_reg08 : nullptr;
+  qpoints->pReadBarrierMarkReg09 = is_marking ? art_quick_read_barrier_mark_reg09 : nullptr;
+  qpoints->pReadBarrierMarkReg10 = is_marking ? art_quick_read_barrier_mark_reg10 : nullptr;
+  qpoints->pReadBarrierMarkReg11 = is_marking ? art_quick_read_barrier_mark_reg11 : nullptr;
+  qpoints->pReadBarrierMarkReg12 = is_marking ? art_quick_read_barrier_mark_reg12 : nullptr;
+  qpoints->pReadBarrierMarkReg13 = is_marking ? art_quick_read_barrier_mark_reg13 : nullptr;
+  qpoints->pReadBarrierMarkReg14 = is_marking ? art_quick_read_barrier_mark_reg14 : nullptr;
+  qpoints->pReadBarrierMarkReg15 = is_marking ? art_quick_read_barrier_mark_reg15 : nullptr;
+}
+
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints) {
 #if defined(__APPLE__)
   UNUSED(jpoints, qpoints);
@@ -64,8 +81,8 @@
   DefaultInitEntryPoints(jpoints, qpoints);
 
   // Cast
-  qpoints->pInstanceofNonTrivial = art_quick_assignable_from_code;
-  qpoints->pCheckCast = art_quick_check_cast;
+  qpoints->pInstanceofNonTrivial = art_quick_instance_of;
+  qpoints->pCheckInstanceOf = art_quick_check_instance_of;
 
   // More math.
   qpoints->pCos = cos;
@@ -102,22 +119,8 @@
 
   // Read barrier.
   qpoints->pReadBarrierJni = ReadBarrierJni;
-  qpoints->pReadBarrierMarkReg00 = art_quick_read_barrier_mark_reg00;
-  qpoints->pReadBarrierMarkReg01 = art_quick_read_barrier_mark_reg01;
-  qpoints->pReadBarrierMarkReg02 = art_quick_read_barrier_mark_reg02;
-  qpoints->pReadBarrierMarkReg03 = art_quick_read_barrier_mark_reg03;
+  UpdateReadBarrierEntrypoints(qpoints, /*is_marking*/ false);
   qpoints->pReadBarrierMarkReg04 = nullptr;  // Cannot use register 4 (RSP) to pass arguments.
-  qpoints->pReadBarrierMarkReg05 = art_quick_read_barrier_mark_reg05;
-  qpoints->pReadBarrierMarkReg06 = art_quick_read_barrier_mark_reg06;
-  qpoints->pReadBarrierMarkReg07 = art_quick_read_barrier_mark_reg07;
-  qpoints->pReadBarrierMarkReg08 = art_quick_read_barrier_mark_reg08;
-  qpoints->pReadBarrierMarkReg09 = art_quick_read_barrier_mark_reg09;
-  qpoints->pReadBarrierMarkReg10 = art_quick_read_barrier_mark_reg10;
-  qpoints->pReadBarrierMarkReg11 = art_quick_read_barrier_mark_reg11;
-  qpoints->pReadBarrierMarkReg12 = art_quick_read_barrier_mark_reg12;
-  qpoints->pReadBarrierMarkReg13 = art_quick_read_barrier_mark_reg13;
-  qpoints->pReadBarrierMarkReg14 = art_quick_read_barrier_mark_reg14;
-  qpoints->pReadBarrierMarkReg15 = art_quick_read_barrier_mark_reg15;
   // x86-64 has only 16 core registers.
   qpoints->pReadBarrierMarkReg16 = nullptr;
   qpoints->pReadBarrierMarkReg17 = nullptr;
diff --git a/runtime/arch/x86_64/instruction_set_features_x86_64.h b/runtime/arch/x86_64/instruction_set_features_x86_64.h
index 0840f89..bc0f708 100644
--- a/runtime/arch/x86_64/instruction_set_features_x86_64.h
+++ b/runtime/arch/x86_64/instruction_set_features_x86_64.h
@@ -21,41 +21,42 @@
 
 namespace art {
 
+class X86_64InstructionSetFeatures;
+using X86_64FeaturesUniquePtr = std::unique_ptr<const X86_64InstructionSetFeatures>;
+
 // Instruction set features relevant to the X86_64 architecture.
 class X86_64InstructionSetFeatures FINAL : public X86InstructionSetFeatures {
  public:
   // Process a CPU variant string like "atom" or "nehalem" and create InstructionSetFeatures.
-  static const X86_64InstructionSetFeatures* FromVariant(const std::string& variant,
-                                                         std::string* error_msg) {
-    return X86InstructionSetFeatures::FromVariant(variant, error_msg, true)
-        ->AsX86_64InstructionSetFeatures();
+  static X86_64FeaturesUniquePtr FromVariant(const std::string& variant, std::string* error_msg) {
+    return Convert(X86InstructionSetFeatures::FromVariant(variant, error_msg, true));
   }
 
   // Parse a bitmap and create an InstructionSetFeatures.
-  static const X86_64InstructionSetFeatures* FromBitmap(uint32_t bitmap) {
-    return X86InstructionSetFeatures::FromBitmap(bitmap, true)->AsX86_64InstructionSetFeatures();
+  static X86_64FeaturesUniquePtr FromBitmap(uint32_t bitmap) {
+    return Convert(X86InstructionSetFeatures::FromBitmap(bitmap, true));
   }
 
   // Turn C pre-processor #defines into the equivalent instruction set features.
-  static const X86_64InstructionSetFeatures* FromCppDefines() {
-    return X86InstructionSetFeatures::FromCppDefines(true)->AsX86_64InstructionSetFeatures();
+  static X86_64FeaturesUniquePtr FromCppDefines() {
+    return Convert(X86InstructionSetFeatures::FromCppDefines(true));
   }
 
   // Process /proc/cpuinfo and use kRuntimeISA to produce InstructionSetFeatures.
-  static const X86_64InstructionSetFeatures* FromCpuInfo() {
-    return X86InstructionSetFeatures::FromCpuInfo(true)->AsX86_64InstructionSetFeatures();
+  static X86_64FeaturesUniquePtr FromCpuInfo() {
+    return Convert(X86InstructionSetFeatures::FromCpuInfo(true));
   }
 
   // Process the auxiliary vector AT_HWCAP entry and use kRuntimeISA to produce
   // InstructionSetFeatures.
-  static const X86_64InstructionSetFeatures* FromHwcap() {
-    return X86InstructionSetFeatures::FromHwcap(true)->AsX86_64InstructionSetFeatures();
+  static X86_64FeaturesUniquePtr FromHwcap() {
+    return Convert(X86InstructionSetFeatures::FromHwcap(true));
   }
 
   // Use assembly tests of the current runtime (ie kRuntimeISA) to determine the
   // InstructionSetFeatures. This works around kernel bugs in AT_HWCAP and /proc/cpuinfo.
-  static const X86_64InstructionSetFeatures* FromAssembly() {
-    return X86InstructionSetFeatures::FromAssembly(true)->AsX86_64InstructionSetFeatures();
+  static X86_64FeaturesUniquePtr FromAssembly() {
+    return Convert(X86InstructionSetFeatures::FromAssembly(true));
   }
 
   InstructionSet GetInstructionSet() const OVERRIDE {
@@ -66,7 +67,7 @@
 
  protected:
   // Parse a string of the form "ssse3" adding these to a new InstructionSetFeatures.
-  const InstructionSetFeatures*
+  std::unique_ptr<const InstructionSetFeatures>
       AddFeaturesFromSplitString(const bool smp, const std::vector<std::string>& features,
                                  std::string* error_msg) const OVERRIDE {
     return X86InstructionSetFeatures::AddFeaturesFromSplitString(smp, features, true, error_msg);
@@ -79,6 +80,10 @@
                                   has_AVX2, has_POPCNT) {
   }
 
+  static X86_64FeaturesUniquePtr Convert(X86FeaturesUniquePtr&& in) {
+    return X86_64FeaturesUniquePtr(in.release()->AsX86_64InstructionSetFeatures());
+  }
+
   friend class X86InstructionSetFeatures;
 
   DISALLOW_COPY_AND_ASSIGN(X86_64InstructionSetFeatures);
diff --git a/runtime/arch/x86_64/quick_entrypoints_x86_64.S b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
index 54e52e5..860b77e 100644
--- a/runtime/arch/x86_64/quick_entrypoints_x86_64.S
+++ b/runtime/arch/x86_64/quick_entrypoints_x86_64.S
@@ -263,16 +263,15 @@
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
-     * when R15 is already saved.
+     * when R14 and R15 are already saved.
      */
-MACRO0(SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED)
+MACRO0(SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED)
 #if defined(__APPLE__)
     int3
     int3
 #else
     // Save core registers from highest to lowest to agree with core spills bitmap.
-    // R15, or at least a placeholder for it, is already on the stack.
-    PUSH r14
+    // R14 and R15, or at least placeholders for them, are already on the stack.
     PUSH r13
     PUSH r12
     PUSH r11
@@ -326,13 +325,23 @@
     /*
      * Macro that sets up the callee save frame to conform with
      * Runtime::CreateCalleeSaveMethod(kSaveEverything)
+     * when R15 is already saved.
+     */
+MACRO0(SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED)
+    PUSH r14
+    SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED
+END_MACRO
+
+    /*
+     * Macro that sets up the callee save frame to conform with
+     * Runtime::CreateCalleeSaveMethod(kSaveEverything)
      */
 MACRO0(SETUP_SAVE_EVERYTHING_FRAME)
     PUSH r15
     SETUP_SAVE_EVERYTHING_FRAME_R15_SAVED
 END_MACRO
 
-MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
+MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_FRPS)
     // Restore FPRs. Method and padding is still on the stack.
     movq 16(%rsp), %xmm0
     movq 24(%rsp), %xmm1
@@ -350,12 +359,10 @@
     movq 120(%rsp), %xmm13
     movq 128(%rsp), %xmm14
     movq 136(%rsp), %xmm15
+END_MACRO
 
-    // Remove save everything callee save method, stack alignment padding and FPRs.
-    addq MACRO_LITERAL(16 + 16 * 8), %rsp
-    CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8))
-    // Restore callee and GPR args, mixed together to agree with core spills bitmap.
-    POP rax
+MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX)
+    // Restore callee and GPR args (except RAX), mixed together to agree with core spills bitmap.
     POP rcx
     POP rdx
     POP rbx
@@ -372,19 +379,47 @@
     POP r15
 END_MACRO
 
+MACRO0(RESTORE_SAVE_EVERYTHING_FRAME)
+    RESTORE_SAVE_EVERYTHING_FRAME_FRPS
+
+    // Remove save everything callee save method, stack alignment padding and FPRs.
+    addq MACRO_LITERAL(16 + 16 * 8), %rsp
+    CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8))
+
+    POP rax
+    RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX
+END_MACRO
+
+MACRO0(RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX)
+    RESTORE_SAVE_EVERYTHING_FRAME_FRPS
+
+    // Remove save everything callee save method, stack alignment padding and FPRs, skip RAX.
+    addq MACRO_LITERAL(16 + 16 * 8 + 8), %rsp
+    CFI_ADJUST_CFA_OFFSET(-(16 + 16 * 8 + 8))
+
+    RESTORE_SAVE_EVERYTHING_FRAME_GPRS_EXCEPT_RAX
+END_MACRO
 
     /*
-     * Macro that set calls through to artDeliverPendingExceptionFromCode, where the pending
-     * exception is Thread::Current()->exception_.
+     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
+     * exception is Thread::Current()->exception_ when the runtime method frame is ready.
      */
-MACRO0(DELIVER_PENDING_EXCEPTION)
-    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME        // save callee saves for throw
+MACRO0(DELIVER_PENDING_EXCEPTION_FRAME_READY)
     // (Thread*) setup
     movq %gs:THREAD_SELF_OFFSET, %rdi
     call SYMBOL(artDeliverPendingExceptionFromCode)  // artDeliverPendingExceptionFromCode(Thread*)
     UNREACHABLE
 END_MACRO
 
+    /*
+     * Macro that calls through to artDeliverPendingExceptionFromCode, where the pending
+     * exception is Thread::Current()->exception_.
+     */
+MACRO0(DELIVER_PENDING_EXCEPTION)
+    SETUP_SAVE_ALL_CALLEE_SAVES_FRAME        // save callee saves for throw
+    DELIVER_PENDING_EXCEPTION_FRAME_READY
+END_MACRO
+
 MACRO2(NO_ARG_RUNTIME_EXCEPTION, c_name, cxx_name)
     DEFINE_FUNCTION VAR(c_name)
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME  // save all registers as basis for long jump context
@@ -1295,45 +1330,48 @@
 END_FUNCTION art_quick_alloc_object_initialized_region_tlab
 
 DEFINE_FUNCTION art_quick_resolve_string
-    movq 8(%rsp), %rcx                                         // get referrer
-    movl ART_METHOD_DECLARING_CLASS_OFFSET(%rcx), %ecx         // get declaring class
-    movq DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%ecx), %rcx  // get string dex cache
-    movq LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %rdx
-    andq %rdi, %rdx
-    movq (%rcx, %rdx, STRING_DEX_CACHE_ELEMENT_SIZE), %rdx
-    movl %edx, %eax
-    shrq LITERAL(32), %rdx
-    cmp %rdx, %rdi
+    // Custom calling convention: RAX serves as both input and output.
+    PUSH r15
+    PUSH r14
+    movq 24(%rsp), %r15                                         // get referrer
+    movl ART_METHOD_DECLARING_CLASS_OFFSET(%r15), %r15d         // get declaring class
+    movq DECLARING_CLASS_DEX_CACHE_STRINGS_OFFSET(%r15d), %r15  // get string dex cache
+    movl LITERAL(STRING_DEX_CACHE_SIZE_MINUS_ONE), %r14d
+    andl %eax, %r14d
+    movq (%r15, %r14, STRING_DEX_CACHE_ELEMENT_SIZE), %r14
+    movl %r14d, %r15d
+    shrq LITERAL(32), %r14
+    cmpl %r14d, %eax
     jne .Lart_quick_resolve_string_slow_path
+    movl %r15d, %eax
+    CFI_REMEMBER_STATE
+    POP r14
+    POP r15
 #ifdef USE_READ_BARRIER
     cmpl LITERAL(0), %gs:THREAD_IS_GC_MARKING_OFFSET
-    jne .Lart_quick_resolve_string_marking
+    jne .Lnot_null_art_quick_read_barrier_mark_reg00
 #endif
     ret
-// Slow path, the index did not match
+    CFI_RESTORE_STATE
+    CFI_DEF_CFA(rsp, 24)                        // workaround for clang bug: 31975598
+
+// Slow path, the index did not match.
 .Lart_quick_resolve_string_slow_path:
-    SETUP_SAVE_REFS_ONLY_FRAME
-    movq %rcx, %rax
+    SETUP_SAVE_EVERYTHING_FRAME_R14_R15_SAVED
     // Outgoing argument set up
+    movl %eax, %edi                             // pass string index
     movq %gs:THREAD_SELF_OFFSET, %rsi           // pass Thread::Current()
     call SYMBOL(artResolveStringFromCode)       // artResolveStringFromCode(arg0, referrer, Thread*)
-    RESTORE_SAVE_REFS_ONLY_FRAME                // restore frame up to return address
-    RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
-// GC is marking case, need to check the mark bit.
-.Lart_quick_resolve_string_marking:
-    testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(%rax)
-    jnz .Lart_quick_resolve_string_no_rb
-    // Save LR so that we can return, also x1 for alignment purposes
-    PUSH rdi
-    PUSH rsi
-    subq LITERAL(8), %rsp                         // 16 byte alignment
-    movq %rax, %rdi
-    call SYMBOL(artReadBarrierMark)
-    addq LITERAL(8), %rsp
-    POP  rsi
-    POP  rdi
-.Lart_quick_resolve_string_no_rb:
+
+    testl %eax, %eax                            // If result is null, deliver the OOME.
+    jz 1f
+    CFI_REMEMBER_STATE
+    RESTORE_SAVE_EVERYTHING_FRAME_KEEP_RAX      // restore frame up to return address
     ret
+    CFI_RESTORE_STATE
+    CFI_DEF_CFA(rsp, FRAME_SIZE_SAVE_EVERYTHING)  // workaround for clang bug: 31975598
+1:
+    DELIVER_PENDING_EXCEPTION_FRAME_READY
 END_FUNCTION art_quick_resolve_string
 
 ONE_ARG_DOWNCALL art_quick_initialize_static_storage, artInitializeStaticStorageFromCode, RETURN_IF_RESULT_IS_NON_ZERO_OR_DELIVER
@@ -1442,19 +1480,21 @@
     RETURN_IF_EAX_ZERO
 END_FUNCTION art_quick_unlock_object_no_inline
 
-DEFINE_FUNCTION art_quick_check_cast
+DEFINE_FUNCTION art_quick_check_instance_of
+    // We could check the super classes here but that is usually already checked in the caller.
     PUSH rdi                          // Save args for exc
     PUSH rsi
     subq LITERAL(8), %rsp             // Alignment padding.
     CFI_ADJUST_CFA_OFFSET(8)
     SETUP_FP_CALLEE_SAVE_FRAME
-    call SYMBOL(artIsAssignableFromCode)  // (Class* klass, Class* ref_klass)
+    call SYMBOL(artInstanceOfFromCode)  // (Object* obj, Class* ref_klass)
     testq %rax, %rax
     jz 1f                             // jump forward if not assignable
     RESTORE_FP_CALLEE_SAVE_FRAME
     addq LITERAL(24), %rsp            // pop arguments
     CFI_ADJUST_CFA_OFFSET(-24)
 
+.Lreturn:
     ret
 
     CFI_ADJUST_CFA_OFFSET(24 + 4 * 8)  // Reset unwind info so following code unwinds.
@@ -1466,9 +1506,9 @@
     POP rdi
     SETUP_SAVE_ALL_CALLEE_SAVES_FRAME // save all registers as basis for long jump context
     mov %gs:THREAD_SELF_OFFSET, %rdx  // pass Thread::Current()
-    call SYMBOL(artThrowClassCastException) // (Class* a, Class* b, Thread*)
+    call SYMBOL(artThrowClassCastExceptionForObject)  // (Object* src, Class* dest, Thread*)
     UNREACHABLE
-END_FUNCTION art_quick_check_cast
+END_FUNCTION art_quick_check_instance_of
 
 
 // Restore reg's value if reg is not the same as exclude_reg, otherwise just adjust stack.
@@ -2102,22 +2142,21 @@
     leal MIRROR_STRING_VALUE_OFFSET(%esi), %esi
 #if (STRING_COMPRESSION_FEATURE)
     /* Differ cases */
-    cmpl LITERAL(0), %r8d
-    jl      .Lstring_compareto_this_is_compressed
-    cmpl    LITERAL(0), %r9d
-    jl      .Lstring_compareto_that_is_compressed
+    shrl    LITERAL(1), %r8d
+    jnc     .Lstring_compareto_this_is_compressed
+    shrl    LITERAL(1), %r9d
+    jnc     .Lstring_compareto_that_is_compressed
     jmp     .Lstring_compareto_both_not_compressed
 .Lstring_compareto_this_is_compressed:
-    andl    LITERAL(0x7FFFFFFF), %r8d
-    cmpl    LITERAL(0), %r9d
-    jl      .Lstring_compareto_both_compressed
+    shrl    LITERAL(1), %r9d
+    jnc     .Lstring_compareto_both_compressed
     /* Comparison this (8-bit) and that (16-bit) */
     mov     %r8d, %eax
     subl    %r9d, %eax
     mov     %r8d, %ecx
     cmovg   %r9d, %ecx
     /* Going into loop to compare each character */
-    jecxz   .Lstring_compareto_keep_length      // check loop counter (if 0 then stop)
+    jecxz   .Lstring_compareto_keep_length1     // check loop counter (if 0 then stop)
 .Lstring_compareto_loop_comparison_this_compressed:
     movzbl  (%edi), %r8d                        // move *(this_cur_char) byte to long
     movzwl  (%esi), %r9d                        // move *(that_cur_char) word to long
@@ -2126,15 +2165,15 @@
     subl    %r9d, %r8d
     loope   .Lstring_compareto_loop_comparison_this_compressed
     cmovne  %r8d, %eax                          // return eax = *(this_cur_char) - *(that_cur_char)
+.Lstring_compareto_keep_length1:
     ret
 .Lstring_compareto_that_is_compressed:
-    andl    LITERAL(0x7FFFFFFF), %r9d
     movl    %r8d, %eax
     subl    %r9d, %eax
     mov     %r8d, %ecx
     cmovg   %r9d, %ecx
     /* Comparison this (8-bit) and that (16-bit) */
-    jecxz   .Lstring_compareto_keep_length      // check loop counter (if 0, don't compare)
+    jecxz   .Lstring_compareto_keep_length2     // check loop counter (if 0, don't compare)
 .Lstring_compareto_loop_comparison_that_compressed:
     movzwl  (%edi), %r8d                        // move *(this_cur_char) word to long
     movzbl  (%esi), %r9d                        // move *(that_cur_chat) byte to long
@@ -2143,17 +2182,17 @@
     subl    %r9d, %r8d
     loope   .Lstring_compareto_loop_comparison_that_compressed
     cmovne  %r8d, %eax                          // return eax = *(this_cur_char) - *(that_cur_char)
+.Lstring_compareto_keep_length2:
     ret
 .Lstring_compareto_both_compressed:
-    andl    LITERAL(0x7FFFFFFF), %r9d
     /* Calculate min length and count diff */
     movl    %r8d, %ecx
     movl    %r8d, %eax
     subl    %r9d, %eax
     cmovg   %r9d, %ecx
-    jecxz   .Lstring_compareto_keep_length
+    jecxz   .Lstring_compareto_keep_length3
     repe    cmpsb
-    je      .Lstring_compareto_keep_length
+    je      .Lstring_compareto_keep_length3
     movzbl  -1(%edi), %eax        // get last compared char from this string (8-bit)
     movzbl  -1(%esi), %ecx        // get last compared char from comp string (8-bit)
     jmp     .Lstring_compareto_count_difference
@@ -2171,29 +2210,29 @@
      *   esi: pointer to comp string data
      *   edi: pointer to this string data
      */
-    jecxz .Lstring_compareto_keep_length
+    jecxz .Lstring_compareto_keep_length3
     repe  cmpsw                   // find nonmatching chars in [%esi] and [%edi], up to length %ecx
-    je    .Lstring_compareto_keep_length
+    je    .Lstring_compareto_keep_length3
     movzwl  -2(%edi), %eax        // get last compared char from this string (16-bit)
     movzwl  -2(%esi), %ecx        // get last compared char from comp string (16-bit)
 .Lstring_compareto_count_difference:
     subl  %ecx, %eax              // return the difference
-.Lstring_compareto_keep_length:
+.Lstring_compareto_keep_length3:
     ret
 END_FUNCTION art_quick_string_compareto
 
 UNIMPLEMENTED art_quick_memcmp16
 
-DEFINE_FUNCTION art_quick_assignable_from_code
+DEFINE_FUNCTION art_quick_instance_of
     SETUP_FP_CALLEE_SAVE_FRAME
     subq LITERAL(8), %rsp                      // Alignment padding.
     CFI_ADJUST_CFA_OFFSET(8)
-    call SYMBOL(artIsAssignableFromCode)       // (const mirror::Class*, const mirror::Class*)
+    call SYMBOL(artInstanceOfFromCode)         // (mirror::Object*, mirror::Class*)
     addq LITERAL(8), %rsp
     CFI_ADJUST_CFA_OFFSET(-8)
     RESTORE_FP_CALLEE_SAVE_FRAME
     ret
-END_FUNCTION art_quick_assignable_from_code
+END_FUNCTION art_quick_instance_of
 
 
 // Return from a nested signal:
@@ -2228,13 +2267,22 @@
     // Null check so that we can load the lock word.
     testq REG_VAR(reg), REG_VAR(reg)
     jz .Lret_rb_\name
+.Lnot_null_\name:
     // Check the mark bit, if it is 1 return.
     testl LITERAL(LOCK_WORD_MARK_BIT_MASK_SHIFTED), MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg))
     jz .Lslow_rb_\name
     ret
 .Lslow_rb_\name:
-    // Save all potentially live caller-save core registers.
     PUSH rax
+    movl MIRROR_OBJECT_LOCK_WORD_OFFSET(REG_VAR(reg)), %eax
+    addl LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW), %eax
+    // Jump if the addl caused eax to unsigned overflow. The only case where it overflows is the
+    // forwarding address one.
+    // Taken ~25% of the time.
+    jnae .Lret_forwarding_address\name
+
+    // Save all potentially live caller-save core registers.
+    movq 0(%rsp), %rax
     PUSH rcx
     PUSH rdx
     PUSH rsi
@@ -2299,6 +2347,12 @@
     POP_REG_NE rax, RAW_VAR(reg)
 .Lret_rb_\name:
     ret
+.Lret_forwarding_address\name:
+    // The overflow cleared the top bits.
+    sall LITERAL(LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT), %eax
+    movq %rax, REG_VAR(reg)
+    POP_REG_NE rax, RAW_VAR(reg)
+    ret
     END_FUNCTION VAR(name)
 END_MACRO
 
diff --git a/runtime/art_field-inl.h b/runtime/art_field-inl.h
index 3b24aab..b9f688d 100644
--- a/runtime/art_field-inl.h
+++ b/runtime/art_field-inl.h
@@ -62,7 +62,7 @@
 }
 
 inline uint32_t ArtField::Get32(ObjPtr<mirror::Object> object) {
-  DCHECK(object != nullptr) << PrettyField(this);
+  DCHECK(object != nullptr) << PrettyField();
   DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted());
   if (UNLIKELY(IsVolatile())) {
     return object->GetField32Volatile(GetOffset());
@@ -72,7 +72,7 @@
 
 template<bool kTransactionActive>
 inline void ArtField::Set32(ObjPtr<mirror::Object> object, uint32_t new_value) {
-  DCHECK(object != nullptr) << PrettyField(this);
+  DCHECK(object != nullptr) << PrettyField();
   DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted());
   if (UNLIKELY(IsVolatile())) {
     object->SetField32Volatile<kTransactionActive>(GetOffset(), new_value);
@@ -82,7 +82,7 @@
 }
 
 inline uint64_t ArtField::Get64(ObjPtr<mirror::Object> object) {
-  DCHECK(object != nullptr) << PrettyField(this);
+  DCHECK(object != nullptr) << PrettyField();
   DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted());
   if (UNLIKELY(IsVolatile())) {
     return object->GetField64Volatile(GetOffset());
@@ -92,7 +92,7 @@
 
 template<bool kTransactionActive>
 inline void ArtField::Set64(ObjPtr<mirror::Object> object, uint64_t new_value) {
-  DCHECK(object != nullptr) << PrettyField(this);
+  DCHECK(object != nullptr) << PrettyField();
   DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted());
   if (UNLIKELY(IsVolatile())) {
     object->SetField64Volatile<kTransactionActive>(GetOffset(), new_value);
@@ -103,7 +103,7 @@
 
 template<class MirrorType>
 inline ObjPtr<MirrorType> ArtField::GetObj(ObjPtr<mirror::Object> object) {
-  DCHECK(object != nullptr) << PrettyField(this);
+  DCHECK(object != nullptr) << PrettyField();
   DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted());
   if (UNLIKELY(IsVolatile())) {
     return object->GetFieldObjectVolatile<MirrorType>(GetOffset());
@@ -113,18 +113,18 @@
 
 template<bool kTransactionActive>
 inline void ArtField::SetObj(ObjPtr<mirror::Object> object, ObjPtr<mirror::Object> new_value) {
-  DCHECK(object != nullptr) << PrettyField(this);
+  DCHECK(object != nullptr) << PrettyField();
   DCHECK(!IsStatic() || (object == GetDeclaringClass()) || !Runtime::Current()->IsStarted());
   if (UNLIKELY(IsVolatile())) {
-    object->SetFieldObjectVolatile<kTransactionActive>(GetOffset(), new_value.Ptr());
+    object->SetFieldObjectVolatile<kTransactionActive>(GetOffset(), new_value);
   } else {
-    object->SetFieldObject<kTransactionActive>(GetOffset(), new_value.Ptr());
+    object->SetFieldObject<kTransactionActive>(GetOffset(), new_value);
   }
 }
 
 #define FIELD_GET(object, type) \
-  DCHECK_EQ(Primitive::kPrim ## type, GetTypeAsPrimitiveType()) << PrettyField(this); \
-  DCHECK((object) != nullptr) << PrettyField(this); \
+  DCHECK_EQ(Primitive::kPrim ## type, GetTypeAsPrimitiveType()) << PrettyField(); \
+  DCHECK((object) != nullptr) << PrettyField(); \
   DCHECK(!IsStatic() || ((object) == GetDeclaringClass()) || !Runtime::Current()->IsStarted()); \
   if (UNLIKELY(IsVolatile())) { \
     return (object)->GetField ## type ## Volatile(GetOffset()); \
@@ -132,8 +132,8 @@
   return (object)->GetField ## type(GetOffset());
 
 #define FIELD_SET(object, type, value) \
-  DCHECK_EQ(Primitive::kPrim ## type, GetTypeAsPrimitiveType()) << PrettyField(this); \
-  DCHECK((object) != nullptr) << PrettyField(this); \
+  DCHECK_EQ(Primitive::kPrim ## type, GetTypeAsPrimitiveType()) << PrettyField(); \
+  DCHECK((object) != nullptr) << PrettyField(); \
   DCHECK(!IsStatic() || ((object) == GetDeclaringClass()) || !Runtime::Current()->IsStarted()); \
   if (UNLIKELY(IsVolatile())) { \
     (object)->SetField ## type ## Volatile<kTransactionActive>(GetOffset(), value); \
@@ -183,7 +183,7 @@
 inline int32_t ArtField::GetInt(ObjPtr<mirror::Object> object) {
   if (kIsDebugBuild) {
     Primitive::Type type = GetTypeAsPrimitiveType();
-    CHECK(type == Primitive::kPrimInt || type == Primitive::kPrimFloat) << PrettyField(this);
+    CHECK(type == Primitive::kPrimInt || type == Primitive::kPrimFloat) << PrettyField();
   }
   return Get32(object);
 }
@@ -192,7 +192,7 @@
 inline void ArtField::SetInt(ObjPtr<mirror::Object> object, int32_t i) {
   if (kIsDebugBuild) {
     Primitive::Type type = GetTypeAsPrimitiveType();
-    CHECK(type == Primitive::kPrimInt || type == Primitive::kPrimFloat) << PrettyField(this);
+    CHECK(type == Primitive::kPrimInt || type == Primitive::kPrimFloat) << PrettyField();
   }
   Set32<kTransactionActive>(object, i);
 }
@@ -200,7 +200,7 @@
 inline int64_t ArtField::GetLong(ObjPtr<mirror::Object> object) {
   if (kIsDebugBuild) {
     Primitive::Type type = GetTypeAsPrimitiveType();
-    CHECK(type == Primitive::kPrimLong || type == Primitive::kPrimDouble) << PrettyField(this);
+    CHECK(type == Primitive::kPrimLong || type == Primitive::kPrimDouble) << PrettyField();
   }
   return Get64(object);
 }
@@ -209,13 +209,13 @@
 inline void ArtField::SetLong(ObjPtr<mirror::Object> object, int64_t j) {
   if (kIsDebugBuild) {
     Primitive::Type type = GetTypeAsPrimitiveType();
-    CHECK(type == Primitive::kPrimLong || type == Primitive::kPrimDouble) << PrettyField(this);
+    CHECK(type == Primitive::kPrimLong || type == Primitive::kPrimDouble) << PrettyField();
   }
   Set64<kTransactionActive>(object, j);
 }
 
 inline float ArtField::GetFloat(ObjPtr<mirror::Object> object) {
-  DCHECK_EQ(Primitive::kPrimFloat, GetTypeAsPrimitiveType()) << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimFloat, GetTypeAsPrimitiveType()) << PrettyField();
   JValue bits;
   bits.SetI(Get32(object));
   return bits.GetF();
@@ -223,14 +223,14 @@
 
 template<bool kTransactionActive>
 inline void ArtField::SetFloat(ObjPtr<mirror::Object> object, float f) {
-  DCHECK_EQ(Primitive::kPrimFloat, GetTypeAsPrimitiveType()) << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimFloat, GetTypeAsPrimitiveType()) << PrettyField();
   JValue bits;
   bits.SetF(f);
   Set32<kTransactionActive>(object, bits.GetI());
 }
 
 inline double ArtField::GetDouble(ObjPtr<mirror::Object> object) {
-  DCHECK_EQ(Primitive::kPrimDouble, GetTypeAsPrimitiveType()) << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimDouble, GetTypeAsPrimitiveType()) << PrettyField();
   JValue bits;
   bits.SetJ(Get64(object));
   return bits.GetD();
@@ -238,20 +238,20 @@
 
 template<bool kTransactionActive>
 inline void ArtField::SetDouble(ObjPtr<mirror::Object> object, double d) {
-  DCHECK_EQ(Primitive::kPrimDouble, GetTypeAsPrimitiveType()) << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimDouble, GetTypeAsPrimitiveType()) << PrettyField();
   JValue bits;
   bits.SetD(d);
   Set64<kTransactionActive>(object, bits.GetJ());
 }
 
 inline ObjPtr<mirror::Object> ArtField::GetObject(ObjPtr<mirror::Object> object) {
-  DCHECK_EQ(Primitive::kPrimNot, GetTypeAsPrimitiveType()) << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimNot, GetTypeAsPrimitiveType()) << PrettyField();
   return GetObj(object);
 }
 
 template<bool kTransactionActive>
 inline void ArtField::SetObject(ObjPtr<mirror::Object> object, ObjPtr<mirror::Object> l) {
-  DCHECK_EQ(Primitive::kPrimNot, GetTypeAsPrimitiveType()) << PrettyField(this);
+  DCHECK_EQ(Primitive::kPrimNot, GetTypeAsPrimitiveType()) << PrettyField();
   SetObj<kTransactionActive>(object, l);
 }
 
diff --git a/runtime/art_field.cc b/runtime/art_field.cc
index 78c62d6..b46b058 100644
--- a/runtime/art_field.cc
+++ b/runtime/art_field.cc
@@ -62,4 +62,25 @@
                                                              hs.NewHandle(dex_cache));
 }
 
+std::string ArtField::PrettyField(ArtField* f, bool with_type) {
+  if (f == nullptr) {
+    return "null";
+  }
+  return f->PrettyField(with_type);
+}
+
+std::string ArtField::PrettyField(bool with_type) {
+  std::string result;
+  if (with_type) {
+    result += PrettyDescriptor(GetTypeDescriptor());
+    result += ' ';
+  }
+  std::string temp;
+  result += PrettyDescriptor(GetDeclaringClass()->GetDescriptor(&temp));
+  result += '.';
+  result += GetName();
+  return result;
+}
+
+
 }  // namespace art
diff --git a/runtime/art_field.h b/runtime/art_field.h
index 8ba383c..7c2f490 100644
--- a/runtime/art_field.h
+++ b/runtime/art_field.h
@@ -201,6 +201,13 @@
     return declaring_class_;
   }
 
+  // Returns a human-readable signature. Something like "a.b.C.f" or
+  // "int a.b.C.f" (depending on the value of 'with_type').
+  static std::string PrettyField(ArtField* f, bool with_type = true)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+  std::string PrettyField(bool with_type = true)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   // Update the declaring class with the passed in visitor. Does not use read barrier.
   template <typename Visitor>
   ALWAYS_INLINE void UpdateObjects(const Visitor& visitor)
diff --git a/runtime/art_method-inl.h b/runtime/art_method-inl.h
index 73c6cf1..a652178 100644
--- a/runtime/art_method-inl.h
+++ b/runtime/art_method-inl.h
@@ -33,6 +33,7 @@
 #include "mirror/object-inl.h"
 #include "mirror/object_array.h"
 #include "oat.h"
+#include "obj_ptr-inl.h"
 #include "quick/quick_method_frame_info.h"
 #include "read_barrier-inl.h"
 #include "runtime-inl.h"
@@ -55,7 +56,7 @@
     if (!IsRuntimeMethod()) {
       CHECK(result != nullptr) << this;
       CHECK(result->IsIdxLoaded() || result->IsErroneous())
-          << result->GetStatus() << " " << PrettyClass(result);
+          << result->GetStatus() << " " << result->PrettyClass();
     } else {
       CHECK(result == nullptr) << this;
     }
@@ -63,7 +64,7 @@
   return result;
 }
 
-inline void ArtMethod::SetDeclaringClass(mirror::Class* new_declaring_class) {
+inline void ArtMethod::SetDeclaringClass(ObjPtr<mirror::Class> new_declaring_class) {
   declaring_class_ = GcRoot<mirror::Class>(new_declaring_class);
 }
 
@@ -227,9 +228,10 @@
     case kDirect:
       return !IsDirect() || IsStatic();
     case kVirtual: {
-      // We have an error if we are direct or a non-default, non-miranda interface method.
+      // We have an error if we are direct or a non-copied (i.e. not part of a real class) interface
+      // method.
       mirror::Class* methods_class = GetDeclaringClass();
-      return IsDirect() || (methods_class->IsInterface() && !IsDefault() && !IsMiranda());
+      return IsDirect() || (methods_class->IsInterface() && !IsCopied());
     }
     case kSuper:
       // Constructors and static methods are called with invoke-direct.
diff --git a/runtime/art_method.cc b/runtime/art_method.cc
index 937dcee..c550a1b 100644
--- a/runtime/art_method.cc
+++ b/runtime/art_method.cc
@@ -276,7 +276,7 @@
     if (LIKELY(have_quick_code)) {
       if (kLogInvocationStartAndReturn) {
         LOG(INFO) << StringPrintf(
-            "Invoking '%s' quick code=%p static=%d", PrettyMethod(this).c_str(),
+            "Invoking '%s' quick code=%p static=%d", PrettyMethod().c_str(),
             GetEntryPointFromQuickCompiledCode(), static_cast<int>(IsStatic() ? 1 : 0));
       }
 
@@ -287,7 +287,7 @@
             ? nullptr
             : GetOatMethodQuickCode(runtime->GetClassLinker()->GetImagePointerSize());
         CHECK(oat_quick_code == nullptr || oat_quick_code != GetEntryPointFromQuickCompiledCode())
-            << "Don't call compiled code when -Xint " << PrettyMethod(this);
+            << "Don't call compiled code when -Xint " << PrettyMethod();
       }
 
       if (!IsStatic()) {
@@ -302,11 +302,11 @@
         self->DeoptimizeWithDeoptimizationException(result);
       }
       if (kLogInvocationStartAndReturn) {
-        LOG(INFO) << StringPrintf("Returned '%s' quick code=%p", PrettyMethod(this).c_str(),
+        LOG(INFO) << StringPrintf("Returned '%s' quick code=%p", PrettyMethod().c_str(),
                                   GetEntryPointFromQuickCompiledCode());
       }
     } else {
-      LOG(INFO) << "Not invoking '" << PrettyMethod(this) << "' code=null";
+      LOG(INFO) << "Not invoking '" << PrettyMethod() << "' code=null";
       if (result != nullptr) {
         result->SetJ(0);
       }
@@ -318,9 +318,9 @@
 }
 
 void ArtMethod::RegisterNative(const void* native_method, bool is_fast) {
-  CHECK(IsNative()) << PrettyMethod(this);
-  CHECK(!IsFastNative()) << PrettyMethod(this);
-  CHECK(native_method != nullptr) << PrettyMethod(this);
+  CHECK(IsNative()) << PrettyMethod();
+  CHECK(!IsFastNative()) << PrettyMethod();
+  CHECK(native_method != nullptr) << PrettyMethod();
   if (is_fast) {
     SetAccessFlags(GetAccessFlags() | kAccFastNative);
   }
@@ -328,7 +328,7 @@
 }
 
 void ArtMethod::UnregisterNative() {
-  CHECK(IsNative() && !IsFastNative()) << PrettyMethod(this);
+  CHECK(IsNative() && !IsFastNative()) << PrettyMethod();
   // restore stub to lookup native pointer via dlsym
   RegisterNative(GetJniDlsymLookupStub(), false);
 }
@@ -421,7 +421,7 @@
       oat_method_index++;
     }
     CHECK(found_virtual) << "Didn't find oat method index for virtual method: "
-                         << PrettyMethod(method);
+                         << method->PrettyMethod();
   }
   DCHECK_EQ(oat_method_index,
             GetOatMethodIndexFromMethodIndex(*declaring_class->GetDexCache()->GetDexFile(),
@@ -468,7 +468,18 @@
   if (!found || (oat_method.GetQuickCode() != nullptr)) {
     return nullptr;
   }
-  return oat_method.GetVmapTable();
+  if (kIsVdexEnabled) {
+    const OatQuickMethodHeader* header = oat_method.GetOatQuickMethodHeader();
+    // OatMethod without a header: no quickening table.
+    if (header == nullptr) {
+      return nullptr;
+    }
+    // The table is in the .vdex file.
+    const OatFile::OatDexFile* oat_dex_file = GetDexCache()->GetDexFile()->GetOatDexFile();
+    return oat_dex_file->GetOatFile()->DexBegin() + header->vmap_table_offset_;
+  } else {
+    return oat_method.GetVmapTable();
+  }
 }
 
 const OatQuickMethodHeader* ArtMethod::GetOatQuickMethodHeader(uintptr_t pc) {
@@ -482,7 +493,7 @@
 
   Runtime* runtime = Runtime::Current();
   const void* existing_entry_point = GetEntryPointFromQuickCompiledCode();
-  CHECK(existing_entry_point != nullptr) << PrettyMethod(this) << "@" << this;
+  CHECK(existing_entry_point != nullptr) << PrettyMethod() << "@" << this;
   ClassLinker* class_linker = runtime->GetClassLinker();
 
   if (class_linker->IsQuickGenericJniStub(existing_entry_point)) {
@@ -517,7 +528,7 @@
       return method_header;
     } else {
       DCHECK(!code_cache->ContainsPc(reinterpret_cast<const void*>(pc)))
-          << PrettyMethod(this)
+          << PrettyMethod()
           << ", pc=" << std::hex << pc
           << ", entry_point=" << std::hex << reinterpret_cast<uintptr_t>(existing_entry_point)
           << ", copy=" << std::boolalpha << IsCopied()
@@ -549,7 +560,7 @@
   }
   const void* oat_entry_point = oat_method.GetQuickCode();
   if (oat_entry_point == nullptr || class_linker->IsQuickGenericJniStub(oat_entry_point)) {
-    DCHECK(IsNative()) << PrettyMethod(this);
+    DCHECK(IsNative()) << PrettyMethod();
     return nullptr;
   }
 
@@ -561,7 +572,7 @@
   }
 
   DCHECK(method_header->Contains(pc))
-      << PrettyMethod(this)
+      << PrettyMethod()
       << " " << std::hex << pc << " " << oat_entry_point
       << " " << (uintptr_t)(method_header->code_ + method_header->code_size_);
   return method_header;
@@ -637,4 +648,66 @@
   return runtime->GetClassLinker()->GetImagePointerSize() == pointer_size;
 }
 
+std::string ArtMethod::PrettyMethod(ArtMethod* m, bool with_signature) {
+  if (m == nullptr) {
+    return "null";
+  }
+  return m->PrettyMethod(with_signature);
+}
+
+std::string ArtMethod::PrettyMethod(bool with_signature) {
+  ArtMethod* m = this;
+  if (!m->IsRuntimeMethod()) {
+    m = m->GetInterfaceMethodIfProxy(Runtime::Current()->GetClassLinker()->GetImagePointerSize());
+  }
+  std::string result(PrettyDescriptor(m->GetDeclaringClassDescriptor()));
+  result += '.';
+  result += m->GetName();
+  if (UNLIKELY(m->IsFastNative())) {
+    result += "!";
+  }
+  if (with_signature) {
+    const Signature signature = m->GetSignature();
+    std::string sig_as_string(signature.ToString());
+    if (signature == Signature::NoSignature()) {
+      return result + sig_as_string;
+    }
+    result = PrettyReturnType(sig_as_string.c_str()) + " " + result +
+        PrettyArguments(sig_as_string.c_str());
+  }
+  return result;
+}
+
+std::string ArtMethod::JniShortName() {
+  std::string class_name(GetDeclaringClassDescriptor());
+  // Remove the leading 'L' and trailing ';'...
+  CHECK_EQ(class_name[0], 'L') << class_name;
+  CHECK_EQ(class_name[class_name.size() - 1], ';') << class_name;
+  class_name.erase(0, 1);
+  class_name.erase(class_name.size() - 1, 1);
+
+  std::string method_name(GetName());
+
+  std::string short_name;
+  short_name += "Java_";
+  short_name += MangleForJni(class_name);
+  short_name += "_";
+  short_name += MangleForJni(method_name);
+  return short_name;
+}
+
+std::string ArtMethod::JniLongName() {
+  std::string long_name;
+  long_name += JniShortName();
+  long_name += "__";
+
+  std::string signature(GetSignature().ToString());
+  signature.erase(0, 1);
+  signature.erase(signature.begin() + signature.find(')'), signature.end());
+
+  long_name += MangleForJni(signature);
+
+  return long_name;
+}
+
 }  // namespace art
diff --git a/runtime/art_method.h b/runtime/art_method.h
index 0d0bf20..b31999f 100644
--- a/runtime/art_method.h
+++ b/runtime/art_method.h
@@ -28,6 +28,7 @@
 #include "method_reference.h"
 #include "modifiers.h"
 #include "mirror/object.h"
+#include "obj_ptr.h"
 #include "read_barrier_option.h"
 #include "utils.h"
 
@@ -69,7 +70,7 @@
   ALWAYS_INLINE mirror::Class* GetDeclaringClassUnchecked()
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void SetDeclaringClass(mirror::Class *new_declaring_class)
+  void SetDeclaringClass(ObjPtr<mirror::Class> new_declaring_class)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   bool CASDeclaringClass(mirror::Class* expected_class, mirror::Class* desired_class)
@@ -614,6 +615,20 @@
   // Returns whether the method has any compiled code, JIT or AOT.
   bool HasAnyCompiledCode() REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // Returns a human-readable signature for 'm'. Something like "a.b.C.m" or
+  // "a.b.C.m(II)V" (depending on the value of 'with_signature').
+  static std::string PrettyMethod(ArtMethod* m, bool with_signature = true)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+  std::string PrettyMethod(bool with_signature = true)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+  // Returns the JNI native function name for the non-overloaded method 'm'.
+  std::string JniShortName()
+      REQUIRES_SHARED(Locks::mutator_lock_);
+  // Returns the JNI native function name for the overloaded method 'm'.
+  std::string JniLongName()
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+
 
   // Update heap objects and non-entrypoint pointers by the passed in visitor for image relocation.
   // Does not use read barrier.
diff --git a/runtime/asm_support.h b/runtime/asm_support.h
index 567791e..5ef1f06 100644
--- a/runtime/asm_support.h
+++ b/runtime/asm_support.h
@@ -98,7 +98,7 @@
 ADD_TEST_EQ(THREAD_LOCAL_END_OFFSET,
             art::Thread::ThreadLocalEndOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.thread_local_objects.
-#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_END_OFFSET + __SIZEOF_POINTER__)
+#define THREAD_LOCAL_OBJECTS_OFFSET (THREAD_LOCAL_END_OFFSET + 2 * __SIZEOF_POINTER__)
 ADD_TEST_EQ(THREAD_LOCAL_OBJECTS_OFFSET,
             art::Thread::ThreadLocalObjectsOffset<POINTER_SIZE>().Int32Value())
 // Offset of field Thread::tlsPtr_.mterp_current_ibase.
@@ -172,6 +172,9 @@
 #define MIRROR_CLASS_COMPONENT_TYPE_OFFSET (4 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_COMPONENT_TYPE_OFFSET,
             art::mirror::Class::ComponentTypeOffset().Int32Value())
+#define MIRROR_CLASS_IF_TABLE_OFFSET (16 + MIRROR_OBJECT_HEADER_SIZE)
+ADD_TEST_EQ(MIRROR_CLASS_IF_TABLE_OFFSET,
+            art::mirror::Class::IfTableOffset().Int32Value())
 #define MIRROR_CLASS_ACCESS_FLAGS_OFFSET (64 + MIRROR_OBJECT_HEADER_SIZE)
 ADD_TEST_EQ(MIRROR_CLASS_ACCESS_FLAGS_OFFSET,
             art::mirror::Class::AccessFlagsOffset().Int32Value())
diff --git a/runtime/base/hash_set.h b/runtime/base/hash_set.h
index 12d3be7..f24a862 100644
--- a/runtime/base/hash_set.h
+++ b/runtime/base/hash_set.h
@@ -296,7 +296,7 @@
     return const_iterator(this, NumBuckets());
   }
 
-  bool Empty() {
+  bool Empty() const {
     return Size() == 0;
   }
 
diff --git a/runtime/base/iteration_range.h b/runtime/base/iteration_range.h
index 54ab174..9d45707 100644
--- a/runtime/base/iteration_range.h
+++ b/runtime/base/iteration_range.h
@@ -54,6 +54,17 @@
   return IterationRange<Iter>(it, it);
 }
 
+template <typename Container>
+inline auto ReverseRange(Container& c) {
+  typedef typename std::reverse_iterator<decltype(c.begin())> riter;
+  return MakeIterationRange(riter(c.end()), riter(c.begin()));
+}
+
+template <typename T, size_t size>
+inline auto ReverseRange(T (&array)[size]) {
+  return ReverseRange(MakeIterationRange<T*>(array, array+size));
+}
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_BASE_ITERATION_RANGE_H_
diff --git a/runtime/base/logging.cc b/runtime/base/logging.cc
index 08c036e..6b21a56 100644
--- a/runtime/base/logging.cc
+++ b/runtime/base/logging.cc
@@ -21,14 +21,12 @@
 #include <sstream>
 
 #include "base/mutex.h"
-#include "runtime.h"
 #include "thread-inl.h"
 #include "utils.h"
 
 // Headers for LogMessage::LogLine.
 #ifdef ART_TARGET_ANDROID
 #include <android/log.h>
-#include <android/set_abort_message.h>
 #else
 #include <sys/types.h>
 #include <unistd.h>
@@ -57,17 +55,7 @@
                                                         : "art";
 }
 
-NO_RETURN
-static void RuntimeAborter(const char* abort_message) {
-#ifdef __ANDROID__
-  android_set_abort_message(abort_message);
-#else
-  UNUSED(abort_message);
-#endif
-  Runtime::Abort(abort_message);
-}
-
-void InitLogging(char* argv[]) {
+void InitLogging(char* argv[], AbortFunction& abort_function) {
   if (gCmdLine.get() != nullptr) {
     return;
   }
@@ -97,7 +85,8 @@
 #else
 #define INIT_LOGGING_DEFAULT_LOGGER android::base::StderrLogger
 #endif
-  android::base::InitLogging(argv, INIT_LOGGING_DEFAULT_LOGGER, RuntimeAborter);
+  android::base::InitLogging(argv, INIT_LOGGING_DEFAULT_LOGGER,
+                             std::move<AbortFunction>(abort_function));
 #undef INIT_LOGGING_DEFAULT_LOGGER
 }
 
diff --git a/runtime/base/logging.h b/runtime/base/logging.h
index 5f84204..a173ac2 100644
--- a/runtime/base/logging.h
+++ b/runtime/base/logging.h
@@ -29,6 +29,9 @@
 using ::android::base::LogSeverity;
 using ::android::base::ScopedLogSeverity;
 
+// Abort function.
+using AbortFunction = void(const char*);
+
 // The members of this struct are the valid arguments to VLOG and VLOG_IS_ON in code,
 // and the "-verbose:" command line argument.
 struct LogVerbosity {
@@ -71,7 +74,7 @@
 // The tag (or '*' for the global level) comes first, followed by a colon
 // and a letter indicating the minimum priority level we're expected to log.
 // This can be used to reveal or conceal logs with specific tags.
-extern void InitLogging(char* argv[]);
+extern void InitLogging(char* argv[], AbortFunction& default_aborter);
 
 // Returns the command line used to invoke the current tool or null if InitLogging hasn't been
 // performed.
diff --git a/runtime/base/mutex-inl.h b/runtime/base/mutex-inl.h
index 1c32024..92b7c65 100644
--- a/runtime/base/mutex-inl.h
+++ b/runtime/base/mutex-inl.h
@@ -23,7 +23,6 @@
 
 #include "base/stringprintf.h"
 #include "base/value_object.h"
-#include "runtime.h"
 #include "thread.h"
 #include "utils.h"
 
@@ -59,8 +58,7 @@
   // on a thread. Lock checking is disabled to avoid deadlock when checking shutdown lock.
   // TODO: tighten this check.
   if (kDebugLocking) {
-    Runtime* runtime = Runtime::Current();
-    CHECK(runtime == nullptr || !runtime->IsStarted() || runtime->IsShuttingDownLocked() ||
+    CHECK(!Locks::IsSafeToCallAbortRacy() ||
           // Used during thread creation to avoid races with runtime shutdown. Thread::Current not
           // yet established.
           level == kRuntimeShutdownLock ||
diff --git a/runtime/base/mutex.cc b/runtime/base/mutex.cc
index 1183dea..5d92298 100644
--- a/runtime/base/mutex.cc
+++ b/runtime/base/mutex.cc
@@ -25,12 +25,13 @@
 #include "base/systrace.h"
 #include "base/value_object.h"
 #include "mutex-inl.h"
-#include "runtime.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread-inl.h"
 
 namespace art {
 
+static Atomic<Locks::ClientCallback*> safe_to_call_abort_callback(nullptr);
+
 Mutex* Locks::abort_lock_ = nullptr;
 Mutex* Locks::alloc_tracker_lock_ = nullptr;
 Mutex* Locks::allocated_monitor_ids_lock_ = nullptr;
@@ -41,7 +42,6 @@
 ReaderWriterMutex* Locks::heap_bitmap_lock_ = nullptr;
 Mutex* Locks::instrument_entrypoints_lock_ = nullptr;
 Mutex* Locks::intern_table_lock_ = nullptr;
-Mutex* Locks::interpreter_string_init_map_lock_ = nullptr;
 Mutex* Locks::jni_libraries_lock_ = nullptr;
 Mutex* Locks::logging_lock_ = nullptr;
 Mutex* Locks::mem_maps_lock_ = nullptr;
@@ -64,6 +64,8 @@
 Mutex* Locks::trace_lock_ = nullptr;
 Mutex* Locks::unexpected_signal_lock_ = nullptr;
 Uninterruptible Roles::uninterruptible_;
+ReaderWriterMutex* Locks::jni_globals_lock_ = nullptr;
+Mutex* Locks::jni_weak_globals_lock_ = nullptr;
 
 struct AllMutexData {
   // A guard for all_mutexes_ that's not a mutex (Mutexes must CAS to acquire and busy wait).
@@ -318,30 +320,26 @@
   exclusive_owner_ = 0;
 }
 
-// Helper to ignore the lock requirement.
-static bool IsShuttingDown() NO_THREAD_SAFETY_ANALYSIS {
-  Runtime* runtime = Runtime::Current();
-  return runtime == nullptr || runtime->IsShuttingDownLocked();
+// Helper to allow checking shutdown while locking for thread safety.
+static bool IsSafeToCallAbortSafe() {
+  MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_);
+  return Locks::IsSafeToCallAbortRacy();
 }
 
 Mutex::~Mutex() {
-  bool shutting_down = IsShuttingDown();
+  bool safe_to_call_abort = Locks::IsSafeToCallAbortRacy();
 #if ART_USE_FUTEXES
   if (state_.LoadRelaxed() != 0) {
-    LOG(shutting_down
-            ? ::android::base::WARNING
-            : ::android::base::FATAL) << "destroying mutex with owner: " << exclusive_owner_;
+    LOG(safe_to_call_abort ? FATAL : WARNING)
+        << "destroying mutex with owner: " << exclusive_owner_;
   } else {
     if (exclusive_owner_ != 0) {
-      LOG(shutting_down
-              ? ::android::base::WARNING
-              : ::android::base::FATAL) << "unexpectedly found an owner on unlocked mutex "
-                                           << name_;
+      LOG(safe_to_call_abort ? FATAL : WARNING)
+          << "unexpectedly found an owner on unlocked mutex " << name_;
     }
     if (num_contenders_.LoadSequentiallyConsistent() != 0) {
-      LOG(shutting_down
-              ? ::android::base::WARNING
-              : ::android::base::FATAL) << "unexpectedly found a contender on mutex " << name_;
+      LOG(safe_to_call_abort ? FATAL : WARNING)
+          << "unexpectedly found a contender on mutex " << name_;
     }
   }
 #else
@@ -350,11 +348,8 @@
   int rc = pthread_mutex_destroy(&mutex_);
   if (rc != 0) {
     errno = rc;
-    // TODO: should we just not log at all if shutting down? this could be the logging mutex!
-    MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_);
-    PLOG(shutting_down
-             ? ::android::base::WARNING
-             : ::android::base::FATAL) << "pthread_mutex_destroy failed for " << name_;
+    PLOG(safe_to_call_abort ? FATAL : WARNING)
+        << "pthread_mutex_destroy failed for " << name_;
   }
 #endif
 }
@@ -542,11 +537,8 @@
   int rc = pthread_rwlock_destroy(&rwlock_);
   if (rc != 0) {
     errno = rc;
-    // TODO: should we just not log at all if shutting down? this could be the logging mutex!
-    MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_);
-    Runtime* runtime = Runtime::Current();
-    bool shutting_down = runtime == nullptr || runtime->IsShuttingDownLocked();
-    PLOG(shutting_down ? WARNING : FATAL) << "pthread_rwlock_destroy failed for " << name_;
+    bool is_safe_to_call_abort = IsSafeToCallAbortSafe();
+    PLOG(is_safe_to_call_abort ? FATAL : WARNING) << "pthread_rwlock_destroy failed for " << name_;
   }
 #endif
 }
@@ -770,11 +762,8 @@
 ConditionVariable::~ConditionVariable() {
 #if ART_USE_FUTEXES
   if (num_waiters_!= 0) {
-    Runtime* runtime = Runtime::Current();
-    bool shutting_down = runtime == nullptr || runtime->IsShuttingDown(Thread::Current());
-    LOG(shutting_down
-           ? ::android::base::WARNING
-           : ::android::base::FATAL)
+    bool is_safe_to_call_abort = IsSafeToCallAbortSafe();
+    LOG(is_safe_to_call_abort ? FATAL : WARNING)
         << "ConditionVariable::~ConditionVariable for " << name_
         << " called with " << num_waiters_ << " waiters.";
   }
@@ -784,12 +773,8 @@
   int rc = pthread_cond_destroy(&cond_);
   if (rc != 0) {
     errno = rc;
-    MutexLock mu(Thread::Current(), *Locks::runtime_shutdown_lock_);
-    Runtime* runtime = Runtime::Current();
-    bool shutting_down = (runtime == nullptr) || runtime->IsShuttingDownLocked();
-    PLOG(shutting_down
-             ? ::android::base::WARNING
-             : ::android::base::FATAL) << "pthread_cond_destroy failed for " << name_;
+    bool is_safe_to_call_abort = IsSafeToCallAbortSafe();
+    PLOG(is_safe_to_call_abort ? FATAL : WARNING) << "pthread_cond_destroy failed for " << name_;
   }
 #endif
 }
@@ -1088,6 +1073,15 @@
     DCHECK(reference_queue_soft_references_lock_ == nullptr);
     reference_queue_soft_references_lock_ = new Mutex("ReferenceQueue soft references lock", current_lock_level);
 
+    UPDATE_CURRENT_LOCK_LEVEL(kJniGlobalsLock);
+    DCHECK(jni_globals_lock_ == nullptr);
+    jni_globals_lock_ =
+        new ReaderWriterMutex("JNI global reference table lock", current_lock_level);
+
+    UPDATE_CURRENT_LOCK_LEVEL(kJniWeakGlobalsLock);
+    DCHECK(jni_weak_globals_lock_ == nullptr);
+    jni_weak_globals_lock_ = new Mutex("JNI weak global reference table lock", current_lock_level);
+
     UPDATE_CURRENT_LOCK_LEVEL(kAbortLock);
     DCHECK(abort_lock_ == nullptr);
     abort_lock_ = new Mutex("abort lock", current_lock_level, true);
@@ -1118,4 +1112,14 @@
   thread_exit_cond_ = new ConditionVariable("thread exit condition variable", *thread_list_lock_);
 }
 
+void Locks::SetClientCallback(ClientCallback* safe_to_call_abort_cb) {
+  safe_to_call_abort_callback.StoreRelease(safe_to_call_abort_cb);
+}
+
+// Helper to allow checking shutdown while ignoring locking requirements.
+bool Locks::IsSafeToCallAbortRacy() {
+  Locks::ClientCallback* safe_to_call_abort_cb = safe_to_call_abort_callback.LoadAcquire();
+  return safe_to_call_abort_cb != nullptr && safe_to_call_abort_cb();
+}
+
 }  // namespace art
diff --git a/runtime/base/mutex.h b/runtime/base/mutex.h
index b3ff6c2..74b786c 100644
--- a/runtime/base/mutex.h
+++ b/runtime/base/mutex.h
@@ -68,6 +68,7 @@
   kMarkSweepMarkStackLock,
   kTransactionLogLock,
   kJniWeakGlobalsLock,
+  kJniGlobalsLock,
   kReferenceQueueSoftReferencesLock,
   kReferenceQueuePhantomReferencesLock,
   kReferenceQueueFinalizerReferencesLock,
@@ -78,7 +79,6 @@
   kAllocSpaceLock,
   kBumpPointerSpaceBlockLock,
   kArenaPoolLock,
-  kDexFileToMethodInlinerMapLock,
   kInternTableLock,
   kOatFileSecondaryLookupLock,
   kHostDlOpenHandlesLock,
@@ -91,12 +91,10 @@
   kDefaultMutexLevel,
   kDexLock,
   kMarkSweepLargeObjectLock,
-  kPinTableLock,
   kJdwpObjectRegistryLock,
   kModifyLdtLock,
   kAllocatedThreadIdsLock,
   kMonitorPoolLock,
-  kMethodVerifiersLock,
   kClassLinkerClassesLock,  // TODO rename.
   kJitCodeCacheLock,
   kBreakpointLock,
@@ -557,6 +555,18 @@
  public:
   static void Init();
   static void InitConditions() NO_THREAD_SAFETY_ANALYSIS;  // Condition variables.
+
+  // Destroying various lock types can emit errors that vary depending upon
+  // whether the client (art::Runtime) is currently active.  Allow the client
+  // to set a callback that is used to check when it is acceptable to call
+  // Abort.  The default behavior is that the client *is not* able to call
+  // Abort if no callback is established.
+  using ClientCallback = bool();
+  static void SetClientCallback(ClientCallback* is_safe_to_call_abort_cb) NO_THREAD_SAFETY_ANALYSIS;
+  // Checks for whether it is safe to call Abort() without using locks.
+  static bool IsSafeToCallAbortRacy() NO_THREAD_SAFETY_ANALYSIS;
+
+
   // Guards allocation entrypoint instrumenting.
   static Mutex* instrument_entrypoints_lock_;
 
@@ -617,12 +627,9 @@
   // TODO: improve name, perhaps instrumentation_update_lock_.
   static Mutex* deoptimization_lock_ ACQUIRED_AFTER(alloc_tracker_lock_);
 
-  // Guards String initializer register map in interpreter.
-  static Mutex* interpreter_string_init_map_lock_ ACQUIRED_AFTER(deoptimization_lock_);
-
   // The thread_list_lock_ guards ThreadList::list_. It is also commonly held to stop threads
   // attaching and detaching.
-  static Mutex* thread_list_lock_ ACQUIRED_AFTER(interpreter_string_init_map_lock_);
+  static Mutex* thread_list_lock_ ACQUIRED_AFTER(deoptimization_lock_);
 
   // Signaled when threads terminate. Used to determine when all non-daemons have terminated.
   static ConditionVariable* thread_exit_cond_ GUARDED_BY(Locks::thread_list_lock_);
@@ -678,8 +685,14 @@
   // Guards soft references queue.
   static Mutex* reference_queue_soft_references_lock_ ACQUIRED_AFTER(reference_queue_phantom_references_lock_);
 
+  // Guard accesses to the JNI Global Reference table.
+  static ReaderWriterMutex* jni_globals_lock_ ACQUIRED_AFTER(reference_queue_soft_references_lock_);
+
+  // Guard accesses to the JNI Weak Global Reference table.
+  static Mutex* jni_weak_globals_lock_ ACQUIRED_AFTER(jni_globals_lock_);
+
   // Have an exclusive aborting thread.
-  static Mutex* abort_lock_ ACQUIRED_AFTER(reference_queue_soft_references_lock_);
+  static Mutex* abort_lock_ ACQUIRED_AFTER(jni_weak_globals_lock_);
 
   // Allow mutual exclusion when manipulating Thread::suspend_count_.
   // TODO: Does the trade-off of a per-thread lock make sense?
diff --git a/runtime/base/time_utils.h b/runtime/base/time_utils.h
index 55d2764..383b52f 100644
--- a/runtime/base/time_utils.h
+++ b/runtime/base/time_utils.h
@@ -73,9 +73,11 @@
 }
 
 #if defined(__APPLE__)
-// No clocks to specify on OS/X, fake value to pass to routines that require a clock.
+#ifndef CLOCK_REALTIME
+// No clocks to specify on OS/X < 10.12, fake value to pass to routines that require a clock.
 #define CLOCK_REALTIME 0xebadf00d
 #endif
+#endif
 
 // Sleep for the given number of nanoseconds, a bad way to handle contention.
 void NanoSleep(uint64_t ns);
diff --git a/runtime/base/unix_file/fd_file.cc b/runtime/base/unix_file/fd_file.cc
index 4498198..ff2dd1b 100644
--- a/runtime/base/unix_file/fd_file.cc
+++ b/runtime/base/unix_file/fd_file.cc
@@ -339,22 +339,59 @@
   return true;
 }
 
-void FdFile::Erase() {
+bool FdFile::Unlink() {
+  if (file_path_.empty()) {
+    return false;
+  }
+
+  // Try to figure out whether this file is still referring to the one on disk.
+  bool is_current = false;
+  {
+    struct stat this_stat, current_stat;
+    int cur_fd = TEMP_FAILURE_RETRY(open(file_path_.c_str(), O_RDONLY));
+    if (cur_fd > 0) {
+      // File still exists.
+      if (fstat(fd_, &this_stat) == 0 && fstat(cur_fd, &current_stat) == 0) {
+        is_current = (this_stat.st_dev == current_stat.st_dev) &&
+                     (this_stat.st_ino == current_stat.st_ino);
+      }
+      close(cur_fd);
+    }
+  }
+
+  if (is_current) {
+    unlink(file_path_.c_str());
+  }
+
+  return is_current;
+}
+
+bool FdFile::Erase(bool unlink) {
   DCHECK(!read_only_mode_);
-  TEMP_FAILURE_RETRY(SetLength(0));
-  TEMP_FAILURE_RETRY(Flush());
-  TEMP_FAILURE_RETRY(Close());
+
+  bool ret_result = true;
+  if (unlink) {
+    ret_result = Unlink();
+  }
+
+  int result;
+  result = SetLength(0);
+  result = Flush();
+  result = Close();
+  // Ignore the errors.
+
+  return ret_result;
 }
 
 int FdFile::FlushCloseOrErase() {
   DCHECK(!read_only_mode_);
-  int flush_result = TEMP_FAILURE_RETRY(Flush());
+  int flush_result = Flush();
   if (flush_result != 0) {
     LOG(ERROR) << "CloseOrErase failed while flushing a file.";
     Erase();
     return flush_result;
   }
-  int close_result = TEMP_FAILURE_RETRY(Close());
+  int close_result = Close();
   if (close_result != 0) {
     LOG(ERROR) << "CloseOrErase failed while closing a file.";
     Erase();
@@ -365,11 +402,11 @@
 
 int FdFile::FlushClose() {
   DCHECK(!read_only_mode_);
-  int flush_result = TEMP_FAILURE_RETRY(Flush());
+  int flush_result = Flush();
   if (flush_result != 0) {
     LOG(ERROR) << "FlushClose failed while flushing a file.";
   }
-  int close_result = TEMP_FAILURE_RETRY(Close());
+  int close_result = Close();
   if (close_result != 0) {
     LOG(ERROR) << "FlushClose failed while closing a file.";
   }
diff --git a/runtime/base/unix_file/fd_file.h b/runtime/base/unix_file/fd_file.h
index d896ee9..eb85c4f 100644
--- a/runtime/base/unix_file/fd_file.h
+++ b/runtime/base/unix_file/fd_file.h
@@ -97,7 +97,14 @@
   int Flush() OVERRIDE WARN_UNUSED;
 
   // Short for SetLength(0); Flush(); Close();
-  void Erase();
+  // If the file was opened with a path name and unlink = true, also calls Unlink() on the path.
+  // Note that it is the the caller's responsibility to avoid races.
+  bool Erase(bool unlink = false);
+
+  // Call unlink() if the file was opened with a path, and if open() with the name shows that
+  // the file descriptor of this file is still up-to-date. This is still racy, though, and it
+  // is up to the caller to ensure correctness in a multi-process setup.
+  bool Unlink();
 
   // Try to Flush(), then try to Close(); If either fails, call Erase().
   int FlushCloseOrErase() WARN_UNUSED;
diff --git a/runtime/base/unix_file/fd_file_test.cc b/runtime/base/unix_file/fd_file_test.cc
index 99ef6f7..7657a38 100644
--- a/runtime/base/unix_file/fd_file_test.cc
+++ b/runtime/base/unix_file/fd_file_test.cc
@@ -186,4 +186,24 @@
   ASSERT_EQ(file2.Close(), 0);
 }
 
+TEST_F(FdFileTest, EraseWithPathUnlinks) {
+  // New scratch file, zero-length.
+  art::ScratchFile tmp;
+  std::string filename = tmp.GetFilename();
+  tmp.Close();  // This is required because of the unlink race between the scratch file and the
+                // FdFile, which leads to close-guard breakage.
+  FdFile file(filename, O_RDWR, false);
+  ASSERT_TRUE(file.IsOpened());
+  EXPECT_GE(file.Fd(), 0);
+  uint8_t buffer[16] = { 0 };
+  EXPECT_TRUE(file.WriteFully(&buffer, sizeof(buffer)));
+  EXPECT_EQ(file.Flush(), 0);
+
+  EXPECT_TRUE(file.Erase(true));
+
+  EXPECT_FALSE(file.IsOpened());
+
+  EXPECT_FALSE(art::OS::FileExists(filename.c_str())) << filename;
+}
+
 }  // namespace unix_file
diff --git a/runtime/base/variant_map_test.cc b/runtime/base/variant_map_test.cc
index ccb22eb..93336e0 100644
--- a/runtime/base/variant_map_test.cc
+++ b/runtime/base/variant_map_test.cc
@@ -107,8 +107,8 @@
   fmFilled.Set(FruitMap::Orange, 555.0);
   EXPECT_EQ(size_t(2), fmFilled.Size());
 
-  // Test copy constructor
-  FruitMap fmEmptyCopy(fmEmpty);
+  // Test copy constructor (NOLINT as a reference is suggested, instead)
+  FruitMap fmEmptyCopy(fmEmpty);  // NOLINT
   EXPECT_EQ(size_t(0), fmEmptyCopy.Size());
 
   // Test copy constructor
diff --git a/runtime/check_jni.cc b/runtime/check_jni.cc
index 4dc7b31..6c27bc6 100644
--- a/runtime/check_jni.cc
+++ b/runtime/check_jni.cc
@@ -274,22 +274,22 @@
       AbortF("field operation on NULL object: %p", java_object);
       return false;
     }
-    if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(o)) {
+    if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(o.Ptr())) {
       Runtime::Current()->GetHeap()->DumpSpaces(LOG_STREAM(ERROR));
       AbortF("field operation on invalid %s: %p",
-             ToStr<IndirectRefKind>(GetIndirectRefKind(java_object)).c_str(),
+             GetIndirectRefKindString(IndirectReferenceTable::GetIndirectRefKind(java_object)),
              java_object);
       return false;
     }
 
-    ArtField* f = CheckFieldID(soa, fid);
+    ArtField* f = CheckFieldID(fid);
     if (f == nullptr) {
       return false;
     }
     mirror::Class* c = o->GetClass();
     if (c->FindInstanceField(f->GetName(), f->GetTypeDescriptor()) == nullptr) {
       AbortF("jfieldID %s not valid for an object of class %s",
-             PrettyField(f).c_str(), PrettyTypeOf(o).c_str());
+             f->PrettyField().c_str(), o->PrettyTypeOf().c_str());
       return false;
     }
     return true;
@@ -313,22 +313,22 @@
   bool CheckMethodAndSig(ScopedObjectAccess& soa, jobject jobj, jclass jc,
                          jmethodID mid, Primitive::Type type, InvokeType invoke)
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    ArtMethod* m = CheckMethodID(soa, mid);
+    ArtMethod* m = CheckMethodID(mid);
     if (m == nullptr) {
       return false;
     }
     if (type != Primitive::GetType(m->GetShorty()[0])) {
-      AbortF("the return type of %s does not match %s", function_name_, PrettyMethod(m).c_str());
+      AbortF("the return type of %s does not match %s", function_name_, m->PrettyMethod().c_str());
       return false;
     }
     bool is_static = (invoke == kStatic);
     if (is_static != m->IsStatic()) {
       if (is_static) {
         AbortF("calling non-static method %s with %s",
-               PrettyMethod(m).c_str(), function_name_);
+               m->PrettyMethod().c_str(), function_name_);
       } else {
         AbortF("calling static method %s with %s",
-               PrettyMethod(m).c_str(), function_name_);
+               m->PrettyMethod().c_str(), function_name_);
       }
       return false;
     }
@@ -336,17 +336,18 @@
       ObjPtr<mirror::Class> c = soa.Decode<mirror::Class>(jc);
       if (!m->GetDeclaringClass()->IsAssignableFrom(c)) {
         AbortF("can't call %s %s with class %s", invoke == kStatic ? "static" : "nonvirtual",
-            PrettyMethod(m).c_str(), PrettyClass(c).c_str());
+            m->PrettyMethod().c_str(), mirror::Class::PrettyClass(c).c_str());
         return false;
       }
     }
     if (invoke != kStatic) {
       ObjPtr<mirror::Object> o = soa.Decode<mirror::Object>(jobj);
       if (o == nullptr) {
-        AbortF("can't call %s on null object", PrettyMethod(m).c_str());
+        AbortF("can't call %s on null object", m->PrettyMethod().c_str());
         return false;
       } else if (!o->InstanceOf(m->GetDeclaringClass())) {
-        AbortF("can't call %s on instance of %s", PrettyMethod(m).c_str(), PrettyTypeOf(o).c_str());
+        AbortF("can't call %s on instance of %s", m->PrettyMethod().c_str(),
+               o->PrettyTypeOf().c_str());
         return false;
       }
     }
@@ -361,12 +362,13 @@
   bool CheckStaticFieldID(ScopedObjectAccess& soa, jclass java_class, jfieldID fid)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     ObjPtr<mirror::Class> c = soa.Decode<mirror::Class>(java_class);
-    ArtField* f = CheckFieldID(soa, fid);
+    ArtField* f = CheckFieldID(fid);
     if (f == nullptr) {
       return false;
     }
     if (c != f->GetDeclaringClass()) {
-      AbortF("static jfieldID %p not valid for class %s", fid, PrettyClass(c).c_str());
+      AbortF("static jfieldID %p not valid for class %s", fid,
+             mirror::Class::PrettyClass(c).c_str());
       return false;
     }
     return true;
@@ -383,13 +385,14 @@
    */
   bool CheckStaticMethod(ScopedObjectAccess& soa, jclass java_class, jmethodID mid)
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    ArtMethod* m = CheckMethodID(soa, mid);
+    ArtMethod* m = CheckMethodID(mid);
     if (m == nullptr) {
       return false;
     }
     ObjPtr<mirror::Class> c = soa.Decode<mirror::Class>(java_class);
     if (!m->GetDeclaringClass()->IsAssignableFrom(c)) {
-      AbortF("can't call static %s on class %s", PrettyMethod(m).c_str(), PrettyClass(c).c_str());
+      AbortF("can't call static %s on class %s", m->PrettyMethod().c_str(),
+             mirror::Class::PrettyClass(c).c_str());
       return false;
     }
     return true;
@@ -404,16 +407,17 @@
    */
   bool CheckVirtualMethod(ScopedObjectAccess& soa, jobject java_object, jmethodID mid)
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    ArtMethod* m = CheckMethodID(soa, mid);
+    ArtMethod* m = CheckMethodID(mid);
     if (m == nullptr) {
       return false;
     }
     ObjPtr<mirror::Object> o = soa.Decode<mirror::Object>(java_object);
     if (o == nullptr) {
-      AbortF("can't call %s on null object", PrettyMethod(m).c_str());
+      AbortF("can't call %s on null object", m->PrettyMethod().c_str());
       return false;
     } else if (!o->InstanceOf(m->GetDeclaringClass())) {
-      AbortF("can't call %s on instance of %s", PrettyMethod(m).c_str(), PrettyTypeOf(o).c_str());
+      AbortF("can't call %s on instance of %s", m->PrettyMethod().c_str(),
+             o->PrettyTypeOf().c_str());
       return false;
     }
     return true;
@@ -481,7 +485,7 @@
         LOG(INFO) << "JNI: call to " << function_name_ << "(" << msg << ")";
       } else if (entry) {
         if (has_method_) {
-          std::string methodName(PrettyMethod(traceMethod, false));
+          std::string methodName(ArtMethod::PrettyMethod(traceMethod, false));
           LOG(INFO) << "JNI: " << methodName << " -> " << function_name_ << "(" << msg << ")";
           indent_ = methodName.size() + 1;
         } else {
@@ -532,7 +536,7 @@
           Thread* self = Thread::Current();
           ScopedObjectAccess soa(self);
           ArtMethod* traceMethod = self->GetCurrentMethod(nullptr);
-          std::string methodName(PrettyMethod(traceMethod, false));
+          std::string methodName(ArtMethod::PrettyMethod(traceMethod, false));
           LOG(INFO) << "JNI: " << methodName << " -> " << function_name_ << "(" << msg << ")";
           indent_ = methodName.size() + 1;
         } else {
@@ -567,21 +571,20 @@
         soa.Decode<mirror::Class>(WellKnownClasses::java_lang_reflect_Constructor) != c) {
       AbortF("expected java.lang.reflect.Method or "
           "java.lang.reflect.Constructor but got object of type %s: %p",
-          PrettyTypeOf(method).c_str(), jmethod);
+          method->PrettyTypeOf().c_str(), jmethod);
       return false;
     }
     return true;
   }
 
-  bool CheckConstructor(ScopedObjectAccess& soa, jmethodID mid)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    ArtMethod* method = soa.DecodeMethod(mid);
+  bool CheckConstructor(jmethodID mid) REQUIRES_SHARED(Locks::mutator_lock_) {
+    ArtMethod* method = jni::DecodeArtMethod(mid);
     if (method == nullptr) {
       AbortF("expected non-null constructor");
       return false;
     }
     if (!method->IsConstructor() || method->IsStatic()) {
-      AbortF("expected a constructor but %s: %p", PrettyMethod(method).c_str(), mid);
+      AbortF("expected a constructor but %s: %p", method->PrettyMethod().c_str(), mid);
       return false;
     }
     return true;
@@ -597,7 +600,7 @@
     mirror::Class* c = field->GetClass();
     if (soa.Decode<mirror::Class>(WellKnownClasses::java_lang_reflect_Field) != c) {
       AbortF("expected java.lang.reflect.Field but got object of type %s: %p",
-             PrettyTypeOf(field).c_str(), jfield);
+             field->PrettyTypeOf().c_str(), jfield);
       return false;
     }
     return true;
@@ -608,7 +611,7 @@
     ObjPtr<mirror::Object> obj = soa.Decode<mirror::Object>(jobj);
     if (!obj->GetClass()->IsThrowableClass()) {
       AbortF("expected java.lang.Throwable but got object of type "
-             "%s: %p", PrettyTypeOf(obj).c_str(), obj.Ptr());
+             "%s: %p", obj->PrettyTypeOf().c_str(), obj.Ptr());
       return false;
     }
     return true;
@@ -619,7 +622,7 @@
     ObjPtr<mirror::Class> c = soa.Decode<mirror::Class>(jc);
     if (!c->IsThrowableClass()) {
       AbortF("expected java.lang.Throwable class but got object of "
-             "type %s: %p", PrettyDescriptor(c).c_str(), c.Ptr());
+             "type %s: %p", c->PrettyDescriptor().c_str(), c.Ptr());
       return false;
     }
     return true;
@@ -628,17 +631,17 @@
   bool CheckReferenceKind(IndirectRefKind expected_kind, Thread* self, jobject obj) {
     IndirectRefKind found_kind;
     if (expected_kind == kLocal) {
-      found_kind = GetIndirectRefKind(obj);
+      found_kind = IndirectReferenceTable::GetIndirectRefKind(obj);
       if (found_kind == kHandleScopeOrInvalid && self->HandleScopeContains(obj)) {
         found_kind = kLocal;
       }
     } else {
-      found_kind = GetIndirectRefKind(obj);
+      found_kind = IndirectReferenceTable::GetIndirectRefKind(obj);
     }
     if (obj != nullptr && found_kind != expected_kind) {
       AbortF("expected reference of kind %s but found %s: %p",
-             ToStr<IndirectRefKind>(expected_kind).c_str(),
-             ToStr<IndirectRefKind>(GetIndirectRefKind(obj)).c_str(),
+             GetIndirectRefKindString(expected_kind),
+             GetIndirectRefKindString(IndirectReferenceTable::GetIndirectRefKind(obj)),
              obj);
       return false;
     }
@@ -649,7 +652,7 @@
       REQUIRES_SHARED(Locks::mutator_lock_) {
     ObjPtr<mirror::Class> c = soa.Decode<mirror::Class>(jc);
     if (!c->IsInstantiableNonArray()) {
-      AbortF("can't make objects of type %s: %p", PrettyDescriptor(c).c_str(), c.Ptr());
+      AbortF("can't make objects of type %s: %p", c->PrettyDescriptor().c_str(), c.Ptr());
       return false;
     }
     return true;
@@ -663,7 +666,7 @@
     ObjPtr<mirror::Array> a = soa.Decode<mirror::Array>(array);
     if (a->GetClass()->GetComponentType()->GetPrimitiveType() != type) {
       AbortF("incompatible array type %s expected %s[]: %p",
-             PrettyDescriptor(a->GetClass()).c_str(), PrettyDescriptor(type).c_str(), array);
+             a->GetClass()->PrettyDescriptor().c_str(), PrettyDescriptor(type).c_str(), array);
       return false;
     }
     return true;
@@ -678,16 +681,17 @@
     if (!is_static && !CheckInstanceFieldID(soa, obj, fid)) {
       return false;
     }
-    ArtField* field = soa.DecodeField(fid);
+    ArtField* field = jni::DecodeArtField(fid);
     DCHECK(field != nullptr);  // Already checked by Check.
     if (is_static != field->IsStatic()) {
       AbortF("attempt to access %s field %s: %p",
-             field->IsStatic() ? "static" : "non-static", PrettyField(field).c_str(), fid);
+             field->IsStatic() ? "static" : "non-static", field->PrettyField().c_str(), fid);
       return false;
     }
     if (type != field->GetTypeAsPrimitiveType()) {
       AbortF("attempt to access field %s of type %s with the wrong type %s: %p",
-             PrettyField(field).c_str(), PrettyDescriptor(field->GetTypeDescriptor()).c_str(),
+             field->PrettyField().c_str(),
+             PrettyDescriptor(field->GetTypeDescriptor()).c_str(),
              PrettyDescriptor(type).c_str(), fid);
       return false;
     }
@@ -695,20 +699,20 @@
       ObjPtr<mirror::Object> o = soa.Decode<mirror::Object>(obj);
       if (o == nullptr || !o->IsClass()) {
         AbortF("attempt to access static field %s with a class argument of type %s: %p",
-               PrettyField(field).c_str(), PrettyTypeOf(o).c_str(), fid);
+               field->PrettyField().c_str(), o->PrettyTypeOf().c_str(), fid);
         return false;
       }
       ObjPtr<mirror::Class> c = o->AsClass();
       if (c != field->GetDeclaringClass()) {
         AbortF("attempt to access static field %s with an incompatible class argument of %s: %p",
-               PrettyField(field).c_str(), PrettyDescriptor(c).c_str(), fid);
+               field->PrettyField().c_str(), mirror::Class::PrettyDescriptor(c).c_str(), fid);
         return false;
       }
     } else {
       ObjPtr<mirror::Object> o = soa.Decode<mirror::Object>(obj);
       if (o == nullptr || !field->GetDeclaringClass()->IsAssignableFrom(o->GetClass())) {
         AbortF("attempt to access field %s from an object argument of type %s: %p",
-               PrettyField(field).c_str(), PrettyTypeOf(o).c_str(), fid);
+               field->PrettyField().c_str(), o->PrettyTypeOf().c_str(), fid);
         return false;
       }
     }
@@ -768,25 +772,29 @@
       // Either java_object is invalid or is a cleared weak.
       IndirectRef ref = reinterpret_cast<IndirectRef>(java_object);
       bool okay;
-      if (GetIndirectRefKind(ref) != kWeakGlobal) {
+      if (IndirectReferenceTable::GetIndirectRefKind(ref) != kWeakGlobal) {
         okay = false;
       } else {
         obj = soa.Vm()->DecodeWeakGlobal(soa.Self(), ref);
-        okay = Runtime::Current()->IsClearedJniWeakGlobal(obj.Ptr());
+        okay = Runtime::Current()->IsClearedJniWeakGlobal(obj);
       }
       if (!okay) {
         AbortF("%s is an invalid %s: %p (%p)",
-               what, ToStr<IndirectRefKind>(GetIndirectRefKind(java_object)).c_str(),
-               java_object, obj.Ptr());
+               what,
+               GetIndirectRefKindString(IndirectReferenceTable::GetIndirectRefKind(java_object)),
+               java_object,
+               obj.Ptr());
         return false;
       }
     }
 
-    if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(obj)) {
+    if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(obj.Ptr())) {
       Runtime::Current()->GetHeap()->DumpSpaces(LOG_STREAM(ERROR));
       AbortF("%s is an invalid %s: %p (%p)",
-             what, ToStr<IndirectRefKind>(GetIndirectRefKind(java_object)).c_str(),
-             java_object, obj.Ptr());
+             what,
+             GetIndirectRefKindString(IndirectReferenceTable::GetIndirectRefKind(java_object)),
+             java_object,
+             obj.Ptr());
       return false;
     }
 
@@ -808,7 +816,7 @@
       break;
     }
     if (!okay) {
-      AbortF("%s has wrong type: %s", what, PrettyTypeOf(obj).c_str());
+      AbortF("%s has wrong type: %s", what, mirror::Object::PrettyTypeOf(obj).c_str());
       return false;
     }
 
@@ -835,9 +843,9 @@
       case 'c':  // jclass
         return CheckInstance(soa, kClass, arg.c, false);
       case 'f':  // jfieldID
-        return CheckFieldID(soa, arg.f) != nullptr;
+        return CheckFieldID(arg.f) != nullptr;
       case 'm':  // jmethodID
-        return CheckMethodID(soa, arg.m) != nullptr;
+        return CheckMethodID(arg.m) != nullptr;
       case 'r':  // release int
         return CheckReleaseMode(arg.r);
       case 's':  // jstring
@@ -859,7 +867,7 @@
       REQUIRES_SHARED(Locks::mutator_lock_) {
     CHECK(args_p != nullptr);
     VarArgs args(args_p->Clone());
-    ArtMethod* m = CheckMethodID(soa, args.GetMethodID());
+    ArtMethod* m = CheckMethodID(args.GetMethodID());
     if (m == nullptr) {
       return false;
     }
@@ -939,12 +947,12 @@
         ObjPtr<mirror::Class> c = soa.Decode<mirror::Class>(jc);
         if (c == nullptr) {
           *msg += "NULL";
-        } else if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(c)) {
+        } else if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(c.Ptr())) {
           StringAppendF(msg, "INVALID POINTER:%p", jc);
         } else if (!c->IsClass()) {
-          *msg += "INVALID NON-CLASS OBJECT OF TYPE:" + PrettyTypeOf(c);
+          *msg += "INVALID NON-CLASS OBJECT OF TYPE:" + c->PrettyTypeOf();
         } else {
-          *msg += PrettyClass(c);
+          *msg += c->PrettyClass();
           if (!entry) {
             StringAppendF(msg, " (%p)", jc);
           }
@@ -953,8 +961,8 @@
       }
       case 'f': {  // jfieldID
         jfieldID fid = arg.f;
-        ArtField* f = soa.DecodeField(fid);
-        *msg += PrettyField(f);
+        ArtField* f = jni::DecodeArtField(fid);
+        *msg += ArtField::PrettyField(f);
         if (!entry) {
           StringAppendF(msg, " (%p)", fid);
         }
@@ -962,8 +970,8 @@
       }
       case 'm': {  // jmethodID
         jmethodID mid = arg.m;
-        ArtMethod* m = soa.DecodeMethod(mid);
-        *msg += PrettyMethod(m);
+        ArtMethod* m = jni::DecodeArtMethod(mid);
+        *msg += ArtMethod::PrettyMethod(m);
         if (!entry) {
           StringAppendF(msg, " (%p)", mid);
         }
@@ -972,7 +980,7 @@
       case '.': {
         const VarArgs* va = arg.va;
         VarArgs args(va->Clone());
-        ArtMethod* m = soa.DecodeMethod(args.GetMethodID());
+        ArtMethod* m = jni::DecodeArtMethod(args.GetMethodID());
         uint32_t len;
         const char* shorty = m->GetShorty(&len);
         CHECK_GE(len, 1u);
@@ -1108,14 +1116,15 @@
     }
 
     ObjPtr<mirror::Array> a = soa.Decode<mirror::Array>(java_array);
-    if (UNLIKELY(!Runtime::Current()->GetHeap()->IsValidObjectAddress(a))) {
+    if (UNLIKELY(!Runtime::Current()->GetHeap()->IsValidObjectAddress(a.Ptr()))) {
       Runtime::Current()->GetHeap()->DumpSpaces(LOG_STREAM(ERROR));
       AbortF("jarray is an invalid %s: %p (%p)",
-             ToStr<IndirectRefKind>(GetIndirectRefKind(java_array)).c_str(),
-             java_array, a.Ptr());
+             GetIndirectRefKindString(IndirectReferenceTable::GetIndirectRefKind(java_array)),
+             java_array,
+             a.Ptr());
       return false;
     } else if (!a->IsArrayInstance()) {
-      AbortF("jarray argument has non-array type: %s", PrettyTypeOf(a).c_str());
+      AbortF("jarray argument has non-array type: %s", a->PrettyTypeOf().c_str());
       return false;
     }
     return true;
@@ -1137,15 +1146,14 @@
     return true;
   }
 
-  ArtField* CheckFieldID(ScopedObjectAccess& soa, jfieldID fid)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
+  ArtField* CheckFieldID(jfieldID fid) REQUIRES_SHARED(Locks::mutator_lock_) {
     if (fid == nullptr) {
       AbortF("jfieldID was NULL");
       return nullptr;
     }
-    ArtField* f = soa.DecodeField(fid);
+    ArtField* f = jni::DecodeArtField(fid);
     // TODO: Better check here.
-    if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(f->GetDeclaringClass())) {
+    if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(f->GetDeclaringClass().Ptr())) {
       Runtime::Current()->GetHeap()->DumpSpaces(LOG_STREAM(ERROR));
       AbortF("invalid jfieldID: %p", fid);
       return nullptr;
@@ -1153,13 +1161,12 @@
     return f;
   }
 
-  ArtMethod* CheckMethodID(ScopedObjectAccess& soa, jmethodID mid)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
+  ArtMethod* CheckMethodID(jmethodID mid) REQUIRES_SHARED(Locks::mutator_lock_) {
     if (mid == nullptr) {
       AbortF("jmethodID was NULL");
       return nullptr;
     }
-    ArtMethod* m = soa.DecodeMethod(mid);
+    ArtMethod* m = jni::DecodeArtMethod(mid);
     // TODO: Better check here.
     if (!Runtime::Current()->GetHeap()->IsValidObjectAddress(m->GetDeclaringClass())) {
       Runtime::Current()->GetHeap()->DumpSpaces(LOG_STREAM(ERROR));
@@ -1995,7 +2002,7 @@
     VarArgs rest(mid, vargs);
     JniValueType args[4] = {{.E = env}, {.c = c}, {.m = mid}, {.va = &rest}};
     if (sc.Check(soa, true, "Ecm.", args) && sc.CheckInstantiableNonArray(soa, c) &&
-        sc.CheckConstructor(soa, mid)) {
+        sc.CheckConstructor(mid)) {
       JniValueType result;
       result.L = baseEnv(env)->NewObjectV(env, c, mid, vargs);
       if (sc.Check(soa, false, "L", &result)) {
@@ -2019,7 +2026,7 @@
     VarArgs rest(mid, vargs);
     JniValueType args[4] = {{.E = env}, {.c = c}, {.m = mid}, {.va = &rest}};
     if (sc.Check(soa, true, "Ecm.", args) && sc.CheckInstantiableNonArray(soa, c) &&
-        sc.CheckConstructor(soa, mid)) {
+        sc.CheckConstructor(mid)) {
       JniValueType result;
       result.L = baseEnv(env)->NewObjectA(env, c, mid, vargs);
       if (sc.Check(soa, false, "L", &result)) {
diff --git a/runtime/check_reference_map_visitor.h b/runtime/check_reference_map_visitor.h
index ab712f9..93fdaa6 100644
--- a/runtime/check_reference_map_visitor.h
+++ b/runtime/check_reference_map_visitor.h
@@ -41,10 +41,10 @@
       return true;
     }
 
-    LOG(INFO) << "At " << PrettyMethod(m, false);
+    LOG(INFO) << "At " << m->PrettyMethod(false);
 
     if (m->IsCalleeSaveMethod()) {
-      LOG(WARNING) << "no PC for " << PrettyMethod(m);
+      LOG(WARNING) << "no PC for " << m->PrettyMethod();
       return true;
     }
 
diff --git a/runtime/class_linker-inl.h b/runtime/class_linker-inl.h
index 378da57..7359243 100644
--- a/runtime/class_linker-inl.h
+++ b/runtime/class_linker-inl.h
@@ -21,11 +21,13 @@
 #include "class_linker.h"
 #include "gc_root-inl.h"
 #include "gc/heap-inl.h"
+#include "obj_ptr-inl.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache-inl.h"
 #include "mirror/iftable.h"
 #include "mirror/object_array.h"
 #include "handle_scope-inl.h"
+#include "scoped_thread_state_change-inl.h"
 
 #include <atomic>
 
@@ -35,12 +37,13 @@
   return FindClass(self, descriptor, ScopedNullHandle<mirror::ClassLoader>());
 }
 
-inline mirror::Class* ClassLinker::FindArrayClass(Thread* self, mirror::Class** element_class) {
+inline mirror::Class* ClassLinker::FindArrayClass(Thread* self,
+                                                  ObjPtr<mirror::Class>* element_class) {
   for (size_t i = 0; i < kFindArrayCacheSize; ++i) {
     // Read the cached array class once to avoid races with other threads setting it.
-    mirror::Class* array_class = find_array_class_cache_[i].Read();
+    ObjPtr<mirror::Class> array_class = find_array_class_cache_[i].Read();
     if (array_class != nullptr && array_class->GetComponentType() == *element_class) {
-      return array_class;
+      return array_class.Ptr();
     }
   }
   std::string descriptor = "[";
@@ -48,8 +51,8 @@
   descriptor += (*element_class)->GetDescriptor(&temp);
   StackHandleScope<2> hs(Thread::Current());
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle((*element_class)->GetClassLoader()));
-  HandleWrapper<mirror::Class> h_element_class(hs.NewHandleWrapper(element_class));
-  mirror::Class* array_class = FindClass(self, descriptor.c_str(), class_loader);
+  HandleWrapperObjPtr<mirror::Class> h_element_class(hs.NewHandleWrapper(element_class));
+  ObjPtr<mirror::Class> array_class = FindClass(self, descriptor.c_str(), class_loader);
   if (array_class != nullptr) {
     // Benign races in storing array class and incrementing index.
     size_t victim_index = find_array_class_cache_next_victim_;
@@ -59,15 +62,15 @@
     // We should have a NoClassDefFoundError.
     self->AssertPendingException();
   }
-  return array_class;
+  return array_class.Ptr();
 }
 
 inline mirror::String* ClassLinker::ResolveString(uint32_t string_idx, ArtMethod* referrer) {
   Thread::PoisonObjectPointersIfDebug();
-  mirror::Class* declaring_class = referrer->GetDeclaringClass();
+  ObjPtr<mirror::Class> declaring_class = referrer->GetDeclaringClass();
   // MethodVerifier refuses methods with string_idx out of bounds.
-  DCHECK_LT(string_idx, declaring_class->GetDexFile().NumStringIds());;
-  mirror::String* string =
+  DCHECK_LT(string_idx, declaring_class->GetDexFile().NumStringIds());
+  ObjPtr<mirror::String> string =
         mirror::StringDexCachePair::Lookup(declaring_class->GetDexCacheStrings(),
                                            string_idx,
                                            mirror::DexCache::kDexCacheStringCacheSize).Read();
@@ -80,14 +83,15 @@
       DCHECK_EQ(dex_cache->GetResolvedString(string_idx), string);
     }
   }
-  return string;
+  return string.Ptr();
 }
 
 inline mirror::Class* ClassLinker::ResolveType(uint16_t type_idx, ArtMethod* referrer) {
   Thread::PoisonObjectPointersIfDebug();
-  mirror::Class* resolved_type = referrer->GetDexCacheResolvedType(type_idx, image_pointer_size_);
+  ObjPtr<mirror::Class> resolved_type =
+      referrer->GetDexCacheResolvedType(type_idx, image_pointer_size_);
   if (UNLIKELY(resolved_type == nullptr)) {
-    mirror::Class* declaring_class = referrer->GetDeclaringClass();
+    ObjPtr<mirror::Class> declaring_class = referrer->GetDeclaringClass();
     StackHandleScope<2> hs(Thread::Current());
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
     Handle<mirror::ClassLoader> class_loader(hs.NewHandle(declaring_class->GetClassLoader()));
@@ -96,14 +100,14 @@
     // Note: We cannot check here to see whether we added the type to the cache. The type
     //       might be an erroneous class, which results in it being hidden from us.
   }
-  return resolved_type;
+  return resolved_type.Ptr();
 }
 
 inline mirror::Class* ClassLinker::ResolveType(uint16_t type_idx, ArtField* referrer) {
   Thread::PoisonObjectPointersIfDebug();
   ObjPtr<mirror::Class> declaring_class = referrer->GetDeclaringClass();
-  mirror::DexCache* dex_cache_ptr = declaring_class->GetDexCache();
-  mirror::Class* resolved_type = dex_cache_ptr->GetResolvedType(type_idx);
+  ObjPtr<mirror::DexCache> dex_cache_ptr = declaring_class->GetDexCache();
+  ObjPtr<mirror::Class> resolved_type = dex_cache_ptr->GetResolvedType(type_idx);
   if (UNLIKELY(resolved_type == nullptr)) {
     StackHandleScope<2> hs(Thread::Current());
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(dex_cache_ptr));
@@ -113,7 +117,7 @@
     // Note: We cannot check here to see whether we added the type to the cache. The type
     //       might be an erroneous class, which results in it being hidden from us.
   }
-  return resolved_type;
+  return resolved_type.Ptr();
 }
 
 inline ArtMethod* ClassLinker::GetResolvedMethod(uint32_t method_idx, ArtMethod* referrer) {
@@ -138,11 +142,11 @@
   // contains.
   const DexFile* dex_file = dex_cache->GetDexFile();
   const DexFile::MethodId& method = dex_file->GetMethodId(method_idx);
-  mirror::Class* resolved_type = dex_cache->GetResolvedType(method.class_idx_);
+  ObjPtr<mirror::Class> resolved_type = dex_cache->GetResolvedType(method.class_idx_);
   if (UNLIKELY(resolved_type == nullptr)) {
     resolved_type = ResolveType(*dex_file, method.class_idx_, dex_cache, class_loader);
   }
-  return resolved_type;
+  return resolved_type.Ptr();
 }
 
 template <ClassLinker::ResolveMode kResolveMode>
@@ -153,7 +157,7 @@
   ArtMethod* resolved_method = GetResolvedMethod(method_idx, referrer);
   Thread::PoisonObjectPointersIfDebug();
   if (UNLIKELY(resolved_method == nullptr)) {
-    mirror::Class* declaring_class = referrer->GetDeclaringClass();
+    ObjPtr<mirror::Class> declaring_class = referrer->GetDeclaringClass();
     StackHandleScope<2> hs(self);
     Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
     Handle<mirror::ClassLoader> h_class_loader(hs.NewHandle(declaring_class->GetClassLoader()));
@@ -170,20 +174,22 @@
   return resolved_method;
 }
 
-inline ArtField* ClassLinker::GetResolvedField(uint32_t field_idx, mirror::DexCache* dex_cache) {
+inline ArtField* ClassLinker::GetResolvedField(uint32_t field_idx,
+                                               ObjPtr<mirror::DexCache> dex_cache) {
   return dex_cache->GetResolvedField(field_idx, image_pointer_size_);
 }
 
-inline ArtField* ClassLinker::GetResolvedField(
-    uint32_t field_idx, mirror::Class* field_declaring_class) {
-  return GetResolvedField(field_idx, field_declaring_class->GetDexCache());
+inline ArtField* ClassLinker::GetResolvedField(uint32_t field_idx,
+                                               ObjPtr<mirror::Class> field_declaring_class) {
+  return GetResolvedField(field_idx, MakeObjPtr(field_declaring_class->GetDexCache()));
 }
 
-inline ArtField* ClassLinker::ResolveField(uint32_t field_idx, ArtMethod* referrer,
+inline ArtField* ClassLinker::ResolveField(uint32_t field_idx,
+                                           ArtMethod* referrer,
                                            bool is_static) {
-  mirror::Class* declaring_class = referrer->GetDeclaringClass();
-  ArtField* resolved_field = GetResolvedField(field_idx, declaring_class);
   Thread::PoisonObjectPointersIfDebug();
+  ObjPtr<mirror::Class> declaring_class = referrer->GetDeclaringClass();
+  ArtField* resolved_field = GetResolvedField(field_idx, declaring_class);
   if (UNLIKELY(resolved_field == nullptr)) {
     StackHandleScope<2> hs(Thread::Current());
     Handle<mirror::DexCache> dex_cache(hs.NewHandle(declaring_class->GetDexCache()));
@@ -199,7 +205,7 @@
 inline mirror::Object* ClassLinker::AllocObject(Thread* self) {
   return GetClassRoot(kJavaLangObject)->Alloc<true, false>(
       self,
-      Runtime::Current()->GetHeap()->GetCurrentAllocator());
+      Runtime::Current()->GetHeap()->GetCurrentAllocator()).Ptr();
 }
 
 template <class T>
@@ -226,17 +232,17 @@
                              ifcount * mirror::IfTable::kMax));
 }
 
-inline mirror::Class* ClassLinker::GetClassRoot(ClassRoot class_root)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
+inline mirror::Class* ClassLinker::GetClassRoot(ClassRoot class_root) {
   DCHECK(!class_roots_.IsNull());
   mirror::ObjectArray<mirror::Class>* class_roots = class_roots_.Read();
-  mirror::Class* klass = class_roots->Get(class_root);
+  ObjPtr<mirror::Class> klass = class_roots->Get(class_root);
   DCHECK(klass != nullptr);
-  return klass;
+  return klass.Ptr();
 }
 
 template<ReadBarrierOption kReadBarrierOption>
-ArtMethod* ClassLinker::FindMethodForProxy(mirror::Class* proxy_class, ArtMethod* proxy_method) {
+ArtMethod* ClassLinker::FindMethodForProxy(ObjPtr<mirror::Class> proxy_class,
+                                           ArtMethod* proxy_method) {
   DCHECK(proxy_class->IsProxyClass());
   DCHECK(proxy_method->IsProxyMethod<kReadBarrierOption>());
   {
@@ -247,8 +253,8 @@
       if (!self->IsJWeakCleared(data.weak_root) &&
           proxy_method->HasSameDexCacheResolvedTypes(data.resolved_types,
                                                      image_pointer_size_)) {
-        mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(
-            self->DecodeJObject(data.weak_root));
+        ObjPtr<mirror::DexCache> dex_cache =
+            ObjPtr<mirror::DexCache>::DownCast(self->DecodeJObject(data.weak_root));
         if (dex_cache != nullptr) {
           ArtMethod* resolved_method = dex_cache->GetResolvedMethod(
               proxy_method->GetDexMethodIndex(), image_pointer_size_);
@@ -258,8 +264,8 @@
       }
     }
   }
-  LOG(FATAL) << "Didn't find dex cache for " << PrettyClass(proxy_class) << " "
-      << PrettyMethod(proxy_method);
+  LOG(FATAL) << "Didn't find dex cache for " << proxy_class->PrettyClass() << " "
+      << proxy_method->PrettyMethod();
   UNREACHABLE();
 }
 
diff --git a/runtime/class_linker.cc b/runtime/class_linker.cc
index 7aa28d3..65e46c2 100644
--- a/runtime/class_linker.cc
+++ b/runtime/class_linker.cc
@@ -63,13 +63,16 @@
 #include "jit/jit.h"
 #include "jit/jit_code_cache.h"
 #include "jit/offline_profiling_info.h"
+#include "jni_internal.h"
 #include "leb128.h"
 #include "linear_alloc.h"
 #include "mirror/class.h"
 #include "mirror/class-inl.h"
+#include "mirror/class_ext.h"
 #include "mirror/class_loader.h"
 #include "mirror/dex_cache.h"
 #include "mirror/dex_cache-inl.h"
+#include "mirror/emulated_stack_frame.h"
 #include "mirror/field.h"
 #include "mirror/iftable-inl.h"
 #include "mirror/method.h"
@@ -121,7 +124,7 @@
   StackHandleScope<1> hs(self);
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(method != nullptr ?
       method->GetDeclaringClass()->GetClassLoader() : nullptr));
-  mirror::Class* exception_class = class_linker->FindClass(self, descriptor, class_loader);
+  ObjPtr<mirror::Class> exception_class = class_linker->FindClass(self, descriptor, class_loader);
 
   if (exception_class == nullptr) {
     // No exc class ~ no <init>-with-string.
@@ -135,10 +138,22 @@
   return exception_init_method != nullptr;
 }
 
-// Helper for ThrowEarlierClassFailure. Throws the stored error.
-static void HandleEarlierVerifyError(Thread* self, ClassLinker* class_linker, mirror::Class* c)
+static mirror::Object* GetVerifyError(ObjPtr<mirror::Class> c)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  mirror::Object* obj = c->GetVerifyError();
+  ObjPtr<mirror::ClassExt> ext(c->GetExtData());
+  if (ext == nullptr) {
+    return nullptr;
+  } else {
+    return ext->GetVerifyError();
+  }
+}
+
+// Helper for ThrowEarlierClassFailure. Throws the stored error.
+static void HandleEarlierVerifyError(Thread* self,
+                                     ClassLinker* class_linker,
+                                     ObjPtr<mirror::Class> c)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  ObjPtr<mirror::Object> obj = GetVerifyError(c);
   DCHECK(obj != nullptr);
   self->AssertNoPendingException();
   if (obj->IsClass()) {
@@ -149,22 +164,22 @@
     const char* descriptor = obj->AsClass()->GetDescriptor(&temp);
 
     if (HasInitWithString(self, class_linker, descriptor)) {
-      self->ThrowNewException(descriptor, PrettyDescriptor(c).c_str());
+      self->ThrowNewException(descriptor, c->PrettyDescriptor().c_str());
     } else {
       self->ThrowNewException(descriptor, nullptr);
     }
   } else {
     // Previous error has been stored as an instance. Just rethrow.
-    mirror::Class* throwable_class =
+    ObjPtr<mirror::Class> throwable_class =
         self->DecodeJObject(WellKnownClasses::java_lang_Throwable)->AsClass();
-    mirror::Class* error_class = obj->GetClass();
+    ObjPtr<mirror::Class> error_class = obj->GetClass();
     CHECK(throwable_class->IsAssignableFrom(error_class));
     self->SetException(obj->AsThrowable());
   }
   self->AssertPendingException();
 }
 
-void ClassLinker::ThrowEarlierClassFailure(mirror::Class* c, bool wrap_in_no_class_def) {
+void ClassLinker::ThrowEarlierClassFailure(ObjPtr<mirror::Class> c, bool wrap_in_no_class_def) {
   // The class failed to initialize on a previous attempt, so we want to throw
   // a NoClassDefFoundError (v2 2.17.5).  The exception to this rule is if we
   // failed in verification, in which case v2 5.4.1 says we need to re-throw
@@ -172,34 +187,38 @@
   Runtime* const runtime = Runtime::Current();
   if (!runtime->IsAotCompiler()) {  // Give info if this occurs at runtime.
     std::string extra;
-    if (c->GetVerifyError() != nullptr) {
-      mirror::Object* verify_error = c->GetVerifyError();
+    if (GetVerifyError(c) != nullptr) {
+      ObjPtr<mirror::Object> verify_error = GetVerifyError(c);
       if (verify_error->IsClass()) {
-        extra = PrettyDescriptor(verify_error->AsClass());
+        extra = mirror::Class::PrettyDescriptor(verify_error->AsClass());
       } else {
         extra = verify_error->AsThrowable()->Dump();
       }
     }
-    LOG(INFO) << "Rejecting re-init on previously-failed class " << PrettyClass(c) << ": " << extra;
+    LOG(INFO) << "Rejecting re-init on previously-failed class " << c->PrettyClass()
+              << ": " << extra;
   }
 
-  CHECK(c->IsErroneous()) << PrettyClass(c) << " " << c->GetStatus();
+  CHECK(c->IsErroneous()) << c->PrettyClass() << " " << c->GetStatus();
   Thread* self = Thread::Current();
   if (runtime->IsAotCompiler()) {
     // At compile time, accurate errors and NCDFE are disabled to speed compilation.
-    mirror::Throwable* pre_allocated = runtime->GetPreAllocatedNoClassDefFoundError();
+    ObjPtr<mirror::Throwable> pre_allocated = runtime->GetPreAllocatedNoClassDefFoundError();
     self->SetException(pre_allocated);
   } else {
-    if (c->GetVerifyError() != nullptr) {
+    if (GetVerifyError(c) != nullptr) {
       // Rethrow stored error.
       HandleEarlierVerifyError(self, this, c);
     }
-    if (c->GetVerifyError() == nullptr || wrap_in_no_class_def) {
+    // TODO This might be wrong if we hit an OOME while allocating the ClassExt. In that case we
+    // might have meant to go down the earlier if statement with the original error but it got
+    // swallowed by the OOM so we end up here.
+    if (GetVerifyError(c) == nullptr || wrap_in_no_class_def) {
       // If there isn't a recorded earlier error, or this is a repeat throw from initialization,
       // the top-level exception must be a NoClassDefFoundError. The potentially already pending
       // exception will be a cause.
       self->ThrowNewWrappedException("Ljava/lang/NoClassDefFoundError;",
-                                     PrettyDescriptor(c).c_str());
+                                     c->PrettyDescriptor().c_str());
     }
   }
 }
@@ -294,7 +313,7 @@
       *field_offset = MemberOffset(RoundUp(field_offset->Uint32Value(), n));
       AddFieldGap(old_offset.Uint32Value(), field_offset->Uint32Value(), gaps);
     }
-    CHECK(type != Primitive::kPrimNot) << PrettyField(field);  // should be primitive types
+    CHECK(type != Primitive::kPrimNot) << field->PrettyField();  // should be primitive types
     grouped_and_sorted_fields->pop_front();
     if (!gaps->empty() && gaps->top().size >= n) {
       FieldGap gap = gaps->top();
@@ -316,7 +335,6 @@
 ClassLinker::ClassLinker(InternTable* intern_table)
     // dex_lock_ is recursive as it may be used in stack dumping.
     : dex_lock_("ClassLinker dex lock", kDexLock),
-      dex_cache_boot_image_class_lookup_required_(false),
       failed_dex_cache_class_lookups_(0),
       class_roots_(nullptr),
       array_iftable_(nullptr),
@@ -336,7 +354,7 @@
 }
 
 void ClassLinker::CheckSystemClass(Thread* self, Handle<mirror::Class> c1, const char* descriptor) {
-  mirror::Class* c2 = FindSystemClass(self, descriptor);
+  ObjPtr<mirror::Class> c2 = FindSystemClass(self, descriptor);
   if (c2 == nullptr) {
     LOG(FATAL) << "Could not find class " << descriptor;
     UNREACHABLE();
@@ -377,13 +395,13 @@
   CHECK(java_lang_Class.Get() != nullptr);
   mirror::Class::SetClassClass(java_lang_Class.Get());
   java_lang_Class->SetClass(java_lang_Class.Get());
-  if (kUseBakerOrBrooksReadBarrier) {
-    java_lang_Class->AssertReadBarrierPointer();
+  if (kUseBakerReadBarrier) {
+    java_lang_Class->AssertReadBarrierState();
   }
   java_lang_Class->SetClassSize(class_class_size);
   java_lang_Class->SetPrimitiveType(Primitive::kPrimNot);
   heap->DecrementDisableMovingGC(self);
-  // AllocClass(mirror::Class*) can now be used
+  // AllocClass(ObjPtr<mirror::Class>) can now be used
 
   // Class[] is used for reflection support.
   auto class_array_class_size = mirror::ObjectArray<mirror::Class>::ClassSize(image_pointer_size_);
@@ -457,6 +475,9 @@
   SetClassRoot(kJavaLangString, java_lang_String.Get());
   SetClassRoot(kJavaLangRefReference, java_lang_ref_Reference.Get());
 
+  // Fill in the empty iftable. Needs to be done after the kObjectArrayClass root is set.
+  java_lang_Object->SetIfTable(AllocIfTable(self, 0));
+
   // Setup the primitive type classes.
   SetClassRoot(kPrimitiveBoolean, CreatePrimitiveClass(self, Primitive::kPrimBoolean));
   SetClassRoot(kPrimitiveByte, CreatePrimitiveClass(self, Primitive::kPrimByte));
@@ -494,6 +515,14 @@
   java_lang_DexCache->SetObjectSize(mirror::DexCache::InstanceSize());
   mirror::Class::SetStatus(java_lang_DexCache, mirror::Class::kStatusResolved, self);
 
+
+  // Setup dalvik.system.ClassExt
+  Handle<mirror::Class> dalvik_system_ClassExt(hs.NewHandle(
+      AllocClass(self, java_lang_Class.Get(), mirror::ClassExt::ClassSize(image_pointer_size_))));
+  SetClassRoot(kDalvikSystemClassExt, dalvik_system_ClassExt.Get());
+  mirror::ClassExt::SetClass(dalvik_system_ClassExt.Get());
+  mirror::Class::SetStatus(dalvik_system_ClassExt, mirror::Class::kStatusResolved, self);
+
   // Set up array classes for string, field, method
   Handle<mirror::Class> object_array_string(hs.NewHandle(
       AllocClass(self, java_lang_Class.Get(),
@@ -539,7 +568,7 @@
     quick_to_interpreter_bridge_trampoline_ = GetQuickToInterpreterBridge();
   }
 
-  // Object, String and DexCache need to be rerun through FindSystemClass to finish init
+  // Object, String, ClassExt and DexCache need to be rerun through FindSystemClass to finish init
   mirror::Class::SetStatus(java_lang_Object, mirror::Class::kStatusNotReady, self);
   CheckSystemClass(self, java_lang_Object, "Ljava/lang/Object;");
   CHECK_EQ(java_lang_Object->GetObjectSize(), mirror::Object::InstanceSize());
@@ -548,6 +577,9 @@
   mirror::Class::SetStatus(java_lang_DexCache, mirror::Class::kStatusNotReady, self);
   CheckSystemClass(self, java_lang_DexCache, "Ljava/lang/DexCache;");
   CHECK_EQ(java_lang_DexCache->GetObjectSize(), mirror::DexCache::InstanceSize());
+  mirror::Class::SetStatus(dalvik_system_ClassExt, mirror::Class::kStatusNotReady, self);
+  CheckSystemClass(self, dalvik_system_ClassExt, "Ldalvik/system/ClassExt;");
+  CHECK_EQ(dalvik_system_ClassExt->GetObjectSize(), mirror::ClassExt::InstanceSize());
 
   // Setup the primitive array type classes - can't be done until Object has a vtable.
   SetClassRoot(kBooleanArrayClass, FindSystemClass(self, "[Z"));
@@ -650,6 +682,11 @@
   SetClassRoot(kJavaLangInvokeMethodHandleImpl, class_root);
   mirror::MethodHandleImpl::SetClass(class_root);
 
+  class_root = FindSystemClass(self, "Ldalvik/system/EmulatedStackFrame;");
+  CHECK(class_root != nullptr);
+  SetClassRoot(kDalvikSystemEmulatedStackFrame, class_root);
+  mirror::EmulatedStackFrame::SetClass(class_root);
+
   // java.lang.ref classes need to be specially flagged, but otherwise are normal classes
   // finish initializing Reference class
   mirror::Class::SetStatus(java_lang_ref_Reference, mirror::Class::kStatusNotReady, self);
@@ -696,7 +733,7 @@
     uint16_t void_type_idx = dex_file.GetIndexForTypeId(*void_type_id);
     // Now we resolve void type so the dex cache contains it. We use java.lang.Object class
     // as referrer so the used dex cache is core's one.
-    mirror::Class* resolved_type = ResolveType(dex_file, void_type_idx, java_lang_Object.Get());
+    ObjPtr<mirror::Class> resolved_type = ResolveType(dex_file, void_type_idx, java_lang_Object.Get());
     CHECK_EQ(resolved_type, GetClassRoot(kPrimitiveVoid));
     self->AssertNoPendingException();
   }
@@ -718,9 +755,10 @@
   // Note: we hard code the field indexes here rather than using FindInstanceField
   // as the types of the field can't be resolved prior to the runtime being
   // fully initialized
-  mirror::Class* java_lang_ref_Reference = GetClassRoot(kJavaLangRefReference);
-  mirror::Class* java_lang_ref_FinalizerReference =
-      FindSystemClass(self, "Ljava/lang/ref/FinalizerReference;");
+  StackHandleScope<2> hs(self);
+  Handle<mirror::Class> java_lang_ref_Reference = hs.NewHandle(GetClassRoot(kJavaLangRefReference));
+  Handle<mirror::Class> java_lang_ref_FinalizerReference =
+      hs.NewHandle(FindSystemClass(self, "Ljava/lang/ref/FinalizerReference;"));
 
   ArtField* pendingNext = java_lang_ref_Reference->GetInstanceField(0);
   CHECK_STREQ(pendingNext->GetName(), "pendingNext");
@@ -745,7 +783,7 @@
   // ensure all class_roots_ are initialized
   for (size_t i = 0; i < kClassRootsMax; i++) {
     ClassRoot class_root = static_cast<ClassRoot>(i);
-    mirror::Class* klass = GetClassRoot(class_root);
+    ObjPtr<mirror::Class> klass = GetClassRoot(class_root);
     CHECK(klass != nullptr);
     DCHECK(klass->IsArrayClass() || klass->IsPrimitive() || klass->GetDexCache() != nullptr);
     // note SetClassRoot does additional validation.
@@ -764,7 +802,7 @@
 void ClassLinker::RunRootClinits() {
   Thread* self = Thread::Current();
   for (size_t i = 0; i < ClassLinker::kClassRootsMax; ++i) {
-    mirror::Class* c = GetClassRoot(ClassRoot(i));
+    ObjPtr<mirror::Class> c = GetClassRoot(ClassRoot(i));
     if (!c->IsArrayClass() && !c->IsPrimitive()) {
       StackHandleScope<1> hs(self);
       Handle<mirror::Class> h_class(hs.NewHandle(GetClassRoot(ClassRoot(i))));
@@ -775,16 +813,16 @@
 }
 
 static void SanityCheckArtMethod(ArtMethod* m,
-                                 mirror::Class* expected_class,
+                                 ObjPtr<mirror::Class> expected_class,
                                  const std::vector<gc::space::ImageSpace*>& spaces)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   if (m->IsRuntimeMethod()) {
-    mirror::Class* declaring_class = m->GetDeclaringClassUnchecked();
-    CHECK(declaring_class == nullptr) << declaring_class << " " << PrettyMethod(m);
+    ObjPtr<mirror::Class> declaring_class = m->GetDeclaringClassUnchecked();
+    CHECK(declaring_class == nullptr) << declaring_class << " " << m->PrettyMethod();
   } else if (m->IsCopied()) {
-    CHECK(m->GetDeclaringClass() != nullptr) << PrettyMethod(m);
+    CHECK(m->GetDeclaringClass() != nullptr) << m->PrettyMethod();
   } else if (expected_class != nullptr) {
-    CHECK_EQ(m->GetDeclaringClassUnchecked(), expected_class) << PrettyMethod(m);
+    CHECK_EQ(m->GetDeclaringClassUnchecked(), expected_class) << m->PrettyMethod();
   }
   if (!spaces.empty()) {
     bool contains = false;
@@ -802,8 +840,8 @@
   }
 }
 
-static void SanityCheckArtMethodPointerArray(mirror::PointerArray* arr,
-                                             mirror::Class* expected_class,
+static void SanityCheckArtMethodPointerArray(ObjPtr<mirror::PointerArray> arr,
+                                             ObjPtr<mirror::Class> expected_class,
                                              PointerSize pointer_size,
                                              const std::vector<gc::space::ImageSpace*>& spaces)
     REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -881,13 +919,11 @@
         SanityCheckArtMethod(klass->GetEmbeddedVTableEntry(i, pointer_size), nullptr, image_spaces);
       }
     }
-    auto* iftable = klass->GetIfTable();
-    if (iftable != nullptr) {
-      for (int32_t i = 0; i < klass->GetIfTableCount(); ++i) {
-        if (iftable->GetMethodArrayCount(i) > 0) {
-          SanityCheckArtMethodPointerArray(
-              iftable->GetMethodArray(i), nullptr, pointer_size, image_spaces);
-        }
+    mirror::IfTable* iftable = klass->GetIfTable();
+    for (int32_t i = 0; i < klass->GetIfTableCount(); ++i) {
+      if (iftable->GetMethodArrayCount(i) > 0) {
+        SanityCheckArtMethodPointerArray(
+            iftable->GetMethodArray(i), nullptr, pointer_size, image_spaces);
       }
     }
   }
@@ -927,7 +963,7 @@
 
 static void CheckTrampolines(mirror::Object* obj, void* arg) NO_THREAD_SAFETY_ANALYSIS {
   if (obj->IsClass()) {
-    mirror::Class* klass = obj->AsClass();
+    ObjPtr<mirror::Class> klass = obj->AsClass();
     TrampolineCheckData* d = reinterpret_cast<TrampolineCheckData*>(arg);
     for (ArtMethod& m : klass->GetMethods(d->pointer_size)) {
       const void* entrypoint = m.GetEntryPointFromQuickCompiledCodePtrSize(d->pointer_size);
@@ -969,7 +1005,6 @@
       return false;
     }
   }
-  dex_cache_boot_image_class_lookup_required_ = true;
   std::vector<const OatFile*> oat_files =
       runtime->GetOatFileManager().RegisterImageOatFiles(spaces);
   DCHECK(!oat_files.empty());
@@ -1012,7 +1047,7 @@
         spaces[i]->GetLiveBitmap()->Walk(CheckTrampolines, &data);
         if (data.error) {
           ArtMethod* m = data.m;
-          LOG(ERROR) << "Found a broken ArtMethod: " << PrettyMethod(m);
+          LOG(ERROR) << "Found a broken ArtMethod: " << ArtMethod::PrettyMethod(m);
           *error_msg = "Found an ArtMethod with a bad entrypoint";
           return false;
         }
@@ -1029,7 +1064,7 @@
   // as being Strings or not
   mirror::String::SetClass(GetClassRoot(kJavaLangString));
 
-  mirror::Class* java_lang_Object = GetClassRoot(kJavaLangObject);
+  ObjPtr<mirror::Class> java_lang_Object = GetClassRoot(kJavaLangObject);
   java_lang_Object->SetObjectSize(sizeof(mirror::Object));
   // Allocate in non-movable so that it's possible to check if a JNI weak global ref has been
   // cleared without triggering the read barrier and unintentionally mark the sentinel alive.
@@ -1059,6 +1094,8 @@
   mirror::ShortArray::SetArrayClass(GetClassRoot(kShortArrayClass));
   mirror::Throwable::SetClass(GetClassRoot(kJavaLangThrowable));
   mirror::StackTraceElement::SetClass(GetClassRoot(kJavaLangStackTraceElement));
+  mirror::EmulatedStackFrame::SetClass(GetClassRoot(kDalvikSystemEmulatedStackFrame));
+  mirror::ClassExt::SetClass(GetClassRoot(kDalvikSystemClassExt));
 
   for (gc::space::ImageSpace* image_space : spaces) {
     // Boot class loader, use a null handle.
@@ -1083,23 +1120,22 @@
 }
 
 bool ClassLinker::IsBootClassLoader(ScopedObjectAccessAlreadyRunnable& soa,
-                                    mirror::ClassLoader* class_loader) {
+                                    ObjPtr<mirror::ClassLoader> class_loader) {
   return class_loader == nullptr ||
        soa.Decode<mirror::Class>(WellKnownClasses::java_lang_BootClassLoader) ==
            class_loader->GetClass();
 }
 
-static mirror::String* GetDexPathListElementName(ScopedObjectAccessUnchecked& soa,
-                                                 ObjPtr<mirror::Object> element)
+static mirror::String* GetDexPathListElementName(ObjPtr<mirror::Object> element)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ArtField* const dex_file_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
   ArtField* const dex_file_name_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_fileName);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexFile_fileName);
   DCHECK(dex_file_field != nullptr);
   DCHECK(dex_file_name_field != nullptr);
   DCHECK(element != nullptr);
-  CHECK_EQ(dex_file_field->GetDeclaringClass(), element->GetClass()) << PrettyTypeOf(element);
+  CHECK_EQ(dex_file_field->GetDeclaringClass(), element->GetClass()) << element->PrettyTypeOf();
   ObjPtr<mirror::Object> dex_file = dex_file_field->GetObject(element);
   if (dex_file == nullptr) {
     return nullptr;
@@ -1111,7 +1147,7 @@
   return nullptr;
 }
 
-static bool FlattenPathClassLoader(mirror::ClassLoader* class_loader,
+static bool FlattenPathClassLoader(ObjPtr<mirror::ClassLoader> class_loader,
                                    std::list<mirror::String*>* out_dex_file_names,
                                    std::string* error_msg)
     REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -1119,15 +1155,16 @@
   DCHECK(error_msg != nullptr);
   ScopedObjectAccessUnchecked soa(Thread::Current());
   ArtField* const dex_path_list_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_PathClassLoader_pathList);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_BaseDexClassLoader_pathList);
   ArtField* const dex_elements_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList_dexElements);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList_dexElements);
   CHECK(dex_path_list_field != nullptr);
   CHECK(dex_elements_field != nullptr);
   while (!ClassLinker::IsBootClassLoader(soa, class_loader)) {
     if (soa.Decode<mirror::Class>(WellKnownClasses::dalvik_system_PathClassLoader) !=
         class_loader->GetClass()) {
-      *error_msg = StringPrintf("Unknown class loader type %s", PrettyTypeOf(class_loader).c_str());
+      *error_msg = StringPrintf("Unknown class loader type %s",
+                                class_loader->PrettyTypeOf().c_str());
       // Unsupported class loader.
       return false;
     }
@@ -1147,7 +1184,7 @@
             *error_msg = StringPrintf("Null dex element at index %d", i);
             return false;
           }
-          ObjPtr<mirror::String> const name = GetDexPathListElementName(soa, element);
+          ObjPtr<mirror::String> const name = GetDexPathListElementName(element);
           if (name == nullptr) {
             *error_msg = StringPrintf("Null name for dex element at index %d", i);
             return false;
@@ -1179,8 +1216,7 @@
           << resolved_types << " is not in image starting at "
           << reinterpret_cast<void*>(header_.GetImageBegin());
       if (!is_copied || in_image_space) {
-        // Go through the array so that we don't need to do a slow map lookup.
-        method->SetDexCacheResolvedTypes(*reinterpret_cast<GcRoot<mirror::Class>**>(resolved_types),
+        method->SetDexCacheResolvedTypes(method->GetDexCache()->GetResolvedTypes(),
                                          kRuntimePointerSize);
       }
     }
@@ -1196,8 +1232,7 @@
           << resolved_methods << " is not in image starting at "
           << reinterpret_cast<void*>(header_.GetImageBegin());
       if (!is_copied || in_image_space) {
-        // Go through the array so that we don't need to do a slow map lookup.
-        method->SetDexCacheResolvedMethods(*reinterpret_cast<ArtMethod***>(resolved_methods),
+        method->SetDexCacheResolvedMethods(method->GetDexCache()->GetResolvedMethods(),
                                            kRuntimePointerSize);
       }
     }
@@ -1213,9 +1248,9 @@
 
   virtual void Visit(ArtMethod* method)
       REQUIRES_SHARED(Locks::mutator_lock_, Locks::classlinker_classes_lock_) {
-    mirror::Class* klass = method->GetDeclaringClass();
+    ObjPtr<mirror::Class> klass = method->GetDeclaringClass();
     if (klass != nullptr && !Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(klass)) {
-      CHECK_EQ(table_->LookupByDescriptor(klass), klass) << PrettyClass(klass);
+      CHECK_EQ(table_->LookupByDescriptor(klass), klass) << mirror::Class::PrettyClass(klass);
     }
   }
 
@@ -1230,9 +1265,9 @@
 
   virtual void Visit(ArtMethod* method)
       REQUIRES_SHARED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
-    mirror::Class* klass = method->GetDeclaringClassUnchecked();
+    ObjPtr<mirror::Class> klass = method->GetDeclaringClassUnchecked();
     if (klass != nullptr) {
-      CHECK(live_bitmap_->Test(klass)) << "Image method has unmarked declaring class";
+      CHECK(live_bitmap_->Test(klass.Ptr())) << "Image method has unmarked declaring class";
     }
   }
 
@@ -1240,6 +1275,20 @@
   gc::accounting::HeapBitmap* const live_bitmap_;
 };
 
+// Copies data from one array to another array at the same position
+// if pred returns false. If there is a page of continuous data in
+// the src array for which pred consistently returns true then
+// corresponding page in the dst array will not be touched.
+// This should reduce number of allocated physical pages.
+template <class T, class NullPred>
+static void CopyNonNull(const T* src, size_t count, T* dst, const NullPred& pred) {
+  for (size_t i = 0; i < count; ++i) {
+    if (!pred(src[i])) {
+      dst[i] = src[i];
+    }
+  }
+}
+
 bool ClassLinker::UpdateAppImageClassLoadersAndDexCaches(
     gc::space::ImageSpace* space,
     Handle<mirror::ClassLoader> class_loader,
@@ -1256,13 +1305,12 @@
     // Add image classes into the class table for the class loader, and fixup the dex caches and
     // class loader fields.
     WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
-    ClassTable* table = InsertClassTableForClassLoader(class_loader.Get());
     // Dex cache array fixup is all or nothing, we must reject app images that have mixed since we
     // rely on clobering the dex cache arrays in the image to forward to bss.
     size_t num_dex_caches_with_bss_arrays = 0;
     const size_t num_dex_caches = dex_caches->GetLength();
     for (size_t i = 0; i < num_dex_caches; i++) {
-      mirror::DexCache* const dex_cache = dex_caches->Get(i);
+      ObjPtr<mirror::DexCache> const dex_cache = dex_caches->Get(i);
       const DexFile* const dex_file = dex_cache->GetDexFile();
       const OatFile::OatDexFile* oat_dex_file = dex_file->GetOatDexFile();
       if (oat_dex_file != nullptr && oat_dex_file->GetDexCacheArrays() != nullptr) {
@@ -1283,7 +1331,7 @@
     }
     // Only add the classes to the class loader after the points where we can return false.
     for (size_t i = 0; i < num_dex_caches; i++) {
-      mirror::DexCache* const dex_cache = dex_caches->Get(i);
+      ObjPtr<mirror::DexCache> dex_cache = dex_caches->Get(i);
       const DexFile* const dex_file = dex_cache->GetDexFile();
       const OatFile::OatDexFile* oat_dex_file = dex_file->GetOatDexFile();
       if (oat_dex_file != nullptr && oat_dex_file->GetDexCacheArrays() != nullptr) {
@@ -1297,12 +1345,9 @@
         const size_t num_types = dex_file->NumTypeIds();
         const size_t num_methods = dex_file->NumMethodIds();
         const size_t num_fields = dex_file->NumFieldIds();
-        size_t num_method_types = 0;
-        if (Runtime::Current()->IsMethodHandlesEnabled()) {
-          num_method_types = mirror::DexCache::kDexCacheMethodTypeCacheSize;
-          if (dex_file->NumProtoIds() < num_method_types) {
-            num_method_types = dex_file->NumProtoIds();
-          }
+        size_t num_method_types = mirror::DexCache::kDexCacheMethodTypeCacheSize;
+        if (dex_file->NumProtoIds() < num_method_types) {
+          num_method_types = dex_file->NumProtoIds();
         }
 
         CHECK_EQ(num_strings, dex_cache->NumStrings());
@@ -1332,11 +1377,12 @@
           for (size_t j = 0; kIsDebugBuild && j < num_types; ++j) {
             DCHECK(types[j].IsNull());
           }
-          std::copy_n(image_resolved_types, num_types, types);
-          // Store a pointer to the new location for fast ArtMethod patching without requiring map.
-          // This leaves random garbage at the start of the dex cache array, but nobody should ever
-          // read from it again.
-          *reinterpret_cast<GcRoot<mirror::Class>**>(image_resolved_types) = types;
+          CopyNonNull(image_resolved_types,
+                      num_types,
+                      types,
+                      [](const GcRoot<mirror::Class>& elem) {
+                          return elem.IsNull();
+                      });
           dex_cache->SetResolvedTypes(types);
         }
         if (num_methods != 0u) {
@@ -1346,9 +1392,12 @@
           for (size_t j = 0; kIsDebugBuild && j < num_methods; ++j) {
             DCHECK(methods[j] == nullptr);
           }
-          std::copy_n(image_resolved_methods, num_methods, methods);
-          // Store a pointer to the new location for fast ArtMethod patching without requiring map.
-          *reinterpret_cast<ArtMethod***>(image_resolved_methods) = methods;
+          CopyNonNull(image_resolved_methods,
+                      num_methods,
+                      methods,
+                      [] (const ArtMethod* method) {
+                          return method == nullptr;
+                      });
           dex_cache->SetResolvedMethods(methods);
         }
         if (num_fields != 0u) {
@@ -1357,7 +1406,12 @@
           for (size_t j = 0; kIsDebugBuild && j < num_fields; ++j) {
             DCHECK(fields[j] == nullptr);
           }
-          std::copy_n(dex_cache->GetResolvedFields(), num_fields, fields);
+          CopyNonNull(dex_cache->GetResolvedFields(),
+                      num_fields,
+                      fields,
+                      [] (const ArtField* field) {
+                          return field == nullptr;
+                      });
           dex_cache->SetResolvedFields(fields);
         }
         if (num_method_types != 0u) {
@@ -1377,6 +1431,7 @@
                 std::memory_order_relaxed);
           }
 
+          mirror::MethodTypeDexCachePair::Initialize(method_types);
           dex_cache->SetResolvedMethodTypes(method_types);
         }
       }
@@ -1385,110 +1440,53 @@
         // Make sure to do this after we update the arrays since we store the resolved types array
         // in DexCacheData in RegisterDexFileLocked. We need the array pointer to be the one in the
         // BSS.
-        mirror::DexCache* existing_dex_cache = FindDexCacheLocked(self,
-                                                                  *dex_file,
-                                                                  /*allow_failure*/true);
+        ObjPtr<mirror::DexCache> existing_dex_cache = FindDexCacheLocked(self,
+                                                                         *dex_file,
+                                                                         /*allow_failure*/true);
         CHECK(existing_dex_cache == nullptr);
         StackHandleScope<1> hs3(self);
-        RegisterDexFileLocked(*dex_file, hs3.NewHandle(dex_cache));
-      }
-      GcRoot<mirror::Class>* const types = dex_cache->GetResolvedTypes();
-      const size_t num_types = dex_cache->NumResolvedTypes();
-      if (new_class_set == nullptr) {
-        for (int32_t j = 0; j < static_cast<int32_t>(num_types); j++) {
-          // The image space is not yet added to the heap, avoid read barriers.
-          mirror::Class* klass = types[j].Read();
-          // There may also be boot image classes,
-          if (space->HasAddress(klass)) {
-            DCHECK_NE(klass->GetStatus(), mirror::Class::kStatusError);
-            // Update the class loader from the one in the image class loader to the one that loaded
-            // the app image.
-            klass->SetClassLoader(class_loader.Get());
-            // The resolved type could be from another dex cache, go through the dex cache just in
-            // case. May be null for array classes.
-            if (klass->GetDexCacheStrings() != nullptr) {
-              DCHECK(!klass->IsArrayClass());
-              klass->SetDexCacheStrings(klass->GetDexCache()->GetStrings());
-            }
-            // If there are multiple dex caches, there may be the same class multiple times
-            // in different dex caches. Check for this since inserting will add duplicates
-            // otherwise.
-            if (num_dex_caches > 1) {
-              mirror::Class* existing = table->LookupByDescriptor(klass);
-              if (existing != nullptr) {
-                DCHECK_EQ(existing, klass) << PrettyClass(klass);
-              } else {
-                table->Insert(klass);
-              }
-            } else {
-              table->Insert(klass);
-            }
-            // Double checked VLOG to avoid overhead.
-            if (VLOG_IS_ON(image)) {
-              VLOG(image) << PrettyClass(klass) << " " << klass->GetStatus();
-              if (!klass->IsArrayClass()) {
-                VLOG(image) << "From " << klass->GetDexCache()->GetDexFile()->GetBaseLocation();
-              }
-              VLOG(image) << "Direct methods";
-              for (ArtMethod& m : klass->GetDirectMethods(kRuntimePointerSize)) {
-                VLOG(image) << PrettyMethod(&m);
-              }
-              VLOG(image) << "Virtual methods";
-              for (ArtMethod& m : klass->GetVirtualMethods(kRuntimePointerSize)) {
-                VLOG(image) << PrettyMethod(&m);
-              }
-            }
-          } else {
-            DCHECK(klass == nullptr || heap->ObjectIsInBootImageSpace(klass))
-                << klass << " " << PrettyClass(klass);
-          }
+        Handle<mirror::DexCache> h_dex_cache = hs3.NewHandle(dex_cache);
+        RegisterDexFileLocked(*dex_file, h_dex_cache);
+        if (kIsDebugBuild) {
+          dex_cache.Assign(h_dex_cache.Get());  // Update dex_cache, used below in debug build.
         }
       }
       if (kIsDebugBuild) {
+        CHECK(new_class_set != nullptr);
+        GcRoot<mirror::Class>* const types = dex_cache->GetResolvedTypes();
+        const size_t num_types = dex_cache->NumResolvedTypes();
         for (int32_t j = 0; j < static_cast<int32_t>(num_types); j++) {
           // The image space is not yet added to the heap, avoid read barriers.
-          mirror::Class* klass = types[j].Read();
-          if (space->HasAddress(klass)) {
+          ObjPtr<mirror::Class> klass = types[j].Read();
+          if (space->HasAddress(klass.Ptr())) {
             DCHECK_NE(klass->GetStatus(), mirror::Class::kStatusError);
-            if (kIsDebugBuild) {
-              if (new_class_set != nullptr) {
-                auto it = new_class_set->Find(GcRoot<mirror::Class>(klass));
-                DCHECK(it != new_class_set->end());
-                DCHECK_EQ(it->Read(), klass);
-                mirror::Class* super_class = klass->GetSuperClass();
-                if (super_class != nullptr && !heap->ObjectIsInBootImageSpace(super_class)) {
-                  auto it2 = new_class_set->Find(GcRoot<mirror::Class>(super_class));
-                  DCHECK(it2 != new_class_set->end());
-                  DCHECK_EQ(it2->Read(), super_class);
-                }
-              } else {
-                DCHECK_EQ(table->LookupByDescriptor(klass), klass);
-                mirror::Class* super_class = klass->GetSuperClass();
-                if (super_class != nullptr && !heap->ObjectIsInBootImageSpace(super_class)) {
-                  CHECK_EQ(table->LookupByDescriptor(super_class), super_class);
-                }
+            auto it = new_class_set->Find(GcRoot<mirror::Class>(klass));
+            DCHECK(it != new_class_set->end());
+            DCHECK_EQ(it->Read(), klass);
+            ObjPtr<mirror::Class> super_class = klass->GetSuperClass();
+            if (super_class != nullptr && !heap->ObjectIsInBootImageSpace(super_class)) {
+              auto it2 = new_class_set->Find(GcRoot<mirror::Class>(super_class));
+              DCHECK(it2 != new_class_set->end());
+              DCHECK_EQ(it2->Read(), super_class);
+            }
+            for (ArtMethod& m : klass->GetDirectMethods(kRuntimePointerSize)) {
+              const void* code = m.GetEntryPointFromQuickCompiledCode();
+              const void* oat_code = m.IsInvokable() ? GetQuickOatCodeFor(&m) : code;
+              if (!IsQuickResolutionStub(code) &&
+                  !IsQuickGenericJniStub(code) &&
+                  !IsQuickToInterpreterBridge(code) &&
+                  !m.IsNative()) {
+                DCHECK_EQ(code, oat_code) << m.PrettyMethod();
               }
             }
-            if (kIsDebugBuild) {
-              for (ArtMethod& m : klass->GetDirectMethods(kRuntimePointerSize)) {
-                const void* code = m.GetEntryPointFromQuickCompiledCode();
-                const void* oat_code = m.IsInvokable() ? GetQuickOatCodeFor(&m) : code;
-                if (!IsQuickResolutionStub(code) &&
-                    !IsQuickGenericJniStub(code) &&
-                    !IsQuickToInterpreterBridge(code) &&
-                    !m.IsNative()) {
-                  DCHECK_EQ(code, oat_code) << PrettyMethod(&m);
-                }
-              }
-              for (ArtMethod& m : klass->GetVirtualMethods(kRuntimePointerSize)) {
-                const void* code = m.GetEntryPointFromQuickCompiledCode();
-                const void* oat_code = m.IsInvokable() ? GetQuickOatCodeFor(&m) : code;
-                if (!IsQuickResolutionStub(code) &&
-                    !IsQuickGenericJniStub(code) &&
-                    !IsQuickToInterpreterBridge(code) &&
-                    !m.IsNative()) {
-                  DCHECK_EQ(code, oat_code) << PrettyMethod(&m);
-                }
+            for (ArtMethod& m : klass->GetVirtualMethods(kRuntimePointerSize)) {
+              const void* code = m.GetEntryPointFromQuickCompiledCode();
+              const void* oat_code = m.IsInvokable() ? GetQuickOatCodeFor(&m) : code;
+              if (!IsQuickResolutionStub(code) &&
+                  !IsQuickGenericJniStub(code) &&
+                  !IsQuickToInterpreterBridge(code) &&
+                  !m.IsNative()) {
+                DCHECK_EQ(code, oat_code) << m.PrettyMethod();
               }
             }
           }
@@ -1516,20 +1514,20 @@
 class UpdateClassLoaderAndResolvedStringsVisitor {
  public:
   UpdateClassLoaderAndResolvedStringsVisitor(gc::space::ImageSpace* space,
-                                             mirror::ClassLoader* class_loader,
+                                             ObjPtr<mirror::ClassLoader> class_loader,
                                              bool forward_strings)
       : space_(space),
         class_loader_(class_loader),
         forward_strings_(forward_strings) {}
 
-  bool operator()(mirror::Class* klass) const REQUIRES_SHARED(Locks::mutator_lock_) {
+  bool operator()(ObjPtr<mirror::Class> klass) const REQUIRES_SHARED(Locks::mutator_lock_) {
     if (forward_strings_) {
       mirror::StringDexCacheType* strings = klass->GetDexCacheStrings();
       if (strings != nullptr) {
         DCHECK(
             space_->GetImageHeader().GetImageSection(ImageHeader::kSectionDexCacheArrays).Contains(
                 reinterpret_cast<uint8_t*>(strings) - space_->Begin()))
-            << "String dex cache array for " << PrettyClass(klass) << " is not in app image";
+            << "String dex cache array for " << klass->PrettyClass() << " is not in app image";
         // Dex caches have already been updated, so take the strings pointer from there.
         mirror::StringDexCacheType* new_strings = klass->GetDexCache()->GetStrings();
         DCHECK_NE(strings, new_strings);
@@ -1542,7 +1540,7 @@
   }
 
   gc::space::ImageSpace* const space_;
-  mirror::ClassLoader* const class_loader_;
+  ObjPtr<mirror::ClassLoader> const class_loader_;
   const bool forward_strings_;
 };
 
@@ -1581,13 +1579,13 @@
                                     std::string* error_msg) {
   ScopedAssertNoThreadSuspension nts(__FUNCTION__);
   const ImageHeader& header = space->GetImageHeader();
-  mirror::Object* dex_caches_object = header.GetImageRoot(ImageHeader::kDexCaches);
+  ObjPtr<mirror::Object> dex_caches_object = header.GetImageRoot(ImageHeader::kDexCaches);
   DCHECK(dex_caches_object != nullptr);
   mirror::ObjectArray<mirror::DexCache>* dex_caches =
       dex_caches_object->AsObjectArray<mirror::DexCache>();
   const OatFile* oat_file = space->GetOatFile();
   for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
-    mirror::DexCache* dex_cache = dex_caches->Get(i);
+    ObjPtr<mirror::DexCache> dex_cache = dex_caches->Get(i);
     std::string dex_file_location(dex_cache->GetLocation()->ToModifiedUtf8());
     std::unique_ptr<const DexFile> dex_file = OpenOatDexFile(oat_file,
                                                              dex_file_location.c_str(),
@@ -1613,7 +1611,7 @@
   const uint64_t start_time = NanoTime();
   const bool app_image = class_loader.Get() != nullptr;
   const ImageHeader& header = space->GetImageHeader();
-  mirror::Object* dex_caches_object = header.GetImageRoot(ImageHeader::kDexCaches);
+  ObjPtr<mirror::Object> dex_caches_object = header.GetImageRoot(ImageHeader::kDexCaches);
   DCHECK(dex_caches_object != nullptr);
   Runtime* const runtime = Runtime::Current();
   gc::Heap* const heap = runtime->GetHeap();
@@ -1682,11 +1680,11 @@
       // TODO: Store image class loaders as image roots.
       GcRoot<mirror::Class>* const types = h_dex_cache->GetResolvedTypes();
       for (int32_t j = 0, num_types = h_dex_cache->NumResolvedTypes(); j < num_types; j++) {
-        mirror::Class* klass = types[j].Read();
+        ObjPtr<mirror::Class> klass = types[j].Read();
         if (klass != nullptr) {
           DCHECK_NE(klass->GetStatus(), mirror::Class::kStatusError);
-          mirror::ClassLoader* image_class_loader = klass->GetClassLoader();
-          image_class_loaders.insert(image_class_loader);
+          ObjPtr<mirror::ClassLoader> image_class_loader = klass->GetClassLoader();
+          image_class_loaders.insert(image_class_loader.Ptr());
         }
       }
     } else {
@@ -1712,7 +1710,7 @@
     // for PathClassLoader does this by looping through the array of dex files. To ensure they
     // resolve the same way, simply flatten the hierarchy in the way the resolution order would be,
     // and check that the dex file names are the same.
-    for (mirror::ClassLoader* image_class_loader : image_class_loaders) {
+    for (ObjPtr<mirror::ClassLoader> image_class_loader : image_class_loaders) {
       if (IsBootClassLoader(soa, image_class_loader)) {
         // The dex cache can reference types from the boot class loader.
         continue;
@@ -1733,10 +1731,10 @@
       // Add the temporary dex path list elements at the end.
       auto elements = soa.Decode<mirror::ObjectArray<mirror::Object>>(dex_elements);
       for (size_t i = 0, num_elems = elements->GetLength(); i < num_elems; ++i) {
-        mirror::Object* element = elements->GetWithoutChecks(i);
+        ObjPtr<mirror::Object> element = elements->GetWithoutChecks(i);
         if (element != nullptr) {
           // If we are somewhere in the middle of the array, there may be nulls at the end.
-          loader_dex_file_names.push_back(GetDexPathListElementName(soa, element));
+          loader_dex_file_names.push_back(GetDexPathListElementName(element));
         }
       }
       // Ignore the number of image dex files since we are adding those to the class loader anyways.
@@ -1754,11 +1752,11 @@
       }
       if (!equal) {
         VLOG(image) << "Image dex files " << image_dex_file_names.size();
-        for (mirror::String* name : image_dex_file_names) {
+        for (ObjPtr<mirror::String> name : image_dex_file_names) {
           VLOG(image) << name->ToModifiedUtf8();
         }
         VLOG(image) << "Loader dex files " << loader_dex_file_names.size();
-        for (mirror::String* name : loader_dex_file_names) {
+        for (ObjPtr<mirror::String> name : loader_dex_file_names) {
           VLOG(image) << name->ToModifiedUtf8();
         }
         *error_msg = "Rejecting application image due to class loader mismatch";
@@ -1805,9 +1803,6 @@
     temp_set = ClassTable::ClassSet(space->Begin() + class_table_section.Offset(),
                                     /*make copy*/false,
                                     &read_count);
-    if (!app_image) {
-      dex_cache_boot_image_class_lookup_required_ = false;
-    }
     VLOG(image) << "Adding class table classes took " << PrettyDuration(NanoTime() - start_time2);
   }
   if (app_image) {
@@ -1815,7 +1810,7 @@
     if (!UpdateAppImageClassLoadersAndDexCaches(space,
                                                 class_loader,
                                                 dex_caches,
-                                                added_class_table ? &temp_set : nullptr,
+                                                &temp_set,
                                                 /*out*/&forward_dex_cache_arrays,
                                                 /*out*/error_msg)) {
       return false;
@@ -1825,10 +1820,8 @@
     UpdateClassLoaderAndResolvedStringsVisitor visitor(space,
                                                        class_loader.Get(),
                                                        forward_dex_cache_arrays);
-    if (added_class_table) {
-      for (GcRoot<mirror::Class>& root : temp_set) {
-        visitor(root.Read());
-      }
+    for (GcRoot<mirror::Class>& root : temp_set) {
+      visitor(root.Read());
     }
     // forward_dex_cache_arrays is true iff we copied all of the dex cache arrays into the .bss.
     // In this case, madvise away the dex cache arrays section of the image to reduce RAM usage and
@@ -1846,6 +1839,12 @@
                     << reinterpret_cast<const void*>(section_end);
       }
     }
+    if (!oat_file->GetBssGcRoots().empty()) {
+      // Insert oat file to class table for visiting .bss GC roots.
+      class_table->InsertOatFile(oat_file);
+    }
+  } else {
+    DCHECK(oat_file->GetBssGcRoots().empty());
   }
   if (added_class_table) {
     WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
@@ -1861,7 +1860,7 @@
   return true;
 }
 
-bool ClassLinker::ClassInClassTable(mirror::Class* klass) {
+bool ClassLinker::ClassInClassTable(ObjPtr<mirror::Class> klass) {
   ClassTable* const class_table = ClassTableForClassLoader(klass->GetClassLoader());
   return class_table != nullptr && class_table->Contains(klass);
 }
@@ -1893,7 +1892,7 @@
     boot_class_table_.VisitRoots(buffered_visitor);
 
     // If tracing is enabled, then mark all the class loaders to prevent unloading.
-    if (tracing_enabled) {
+    if ((flags & kVisitRootFlagClassLoader) != 0 || tracing_enabled) {
       for (const ClassLoaderData& data : class_loaders_) {
         GcRoot<mirror::Object> root(GcRoot<mirror::Object>(self->DecodeJObject(data.weak_root)));
         root.VisitRoot(visitor, RootInfo(kRootVMInternal));
@@ -1901,9 +1900,9 @@
     }
   } else if ((flags & kVisitRootFlagNewRoots) != 0) {
     for (auto& root : new_class_roots_) {
-      mirror::Class* old_ref = root.Read<kWithoutReadBarrier>();
+      ObjPtr<mirror::Class> old_ref = root.Read<kWithoutReadBarrier>();
       root.VisitRoot(visitor, RootInfo(kRootStickyClass));
-      mirror::Class* new_ref = root.Read<kWithoutReadBarrier>();
+      ObjPtr<mirror::Class> new_ref = root.Read<kWithoutReadBarrier>();
       // Concurrent moving GC marked new roots through the to-space invariant.
       CHECK_EQ(new_ref, old_ref);
     }
@@ -1939,7 +1938,7 @@
       : visitor_(visitor),
         done_(false) {}
 
-  void Visit(mirror::ClassLoader* class_loader)
+  void Visit(ObjPtr<mirror::ClassLoader> class_loader)
       REQUIRES_SHARED(Locks::classlinker_classes_lock_, Locks::mutator_lock_) OVERRIDE {
     ClassTable* const class_table = class_loader->GetClassTable();
     if (!done_ && class_table != nullptr && !class_table->Visit(*visitor_)) {
@@ -1962,9 +1961,6 @@
 }
 
 void ClassLinker::VisitClasses(ClassVisitor* visitor) {
-  if (dex_cache_boot_image_class_lookup_required_) {
-    AddBootImageClassesToClassTable();
-  }
   Thread* const self = Thread::Current();
   ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);
   // Not safe to have thread suspension when we are holding a lock.
@@ -1978,11 +1974,11 @@
 
 class GetClassesInToVector : public ClassVisitor {
  public:
-  bool operator()(mirror::Class* klass) OVERRIDE {
+  bool operator()(ObjPtr<mirror::Class> klass) OVERRIDE {
     classes_.push_back(klass);
     return true;
   }
-  std::vector<mirror::Class*> classes_;
+  std::vector<ObjPtr<mirror::Class>> classes_;
 };
 
 class GetClassInToObjectArray : public ClassVisitor {
@@ -1990,7 +1986,7 @@
   explicit GetClassInToObjectArray(mirror::ObjectArray<mirror::Class>* arr)
       : arr_(arr), index_(0) {}
 
-  bool operator()(mirror::Class* klass) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
+  bool operator()(ObjPtr<mirror::Class> klass) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
     ++index_;
     if (index_ <= arr_->GetLength()) {
       arr_->Set(index_ - 1, klass);
@@ -2015,7 +2011,7 @@
     ScopedAssertNoThreadSuspension nts(__FUNCTION__);
     GetClassesInToVector accumulator;
     VisitClasses(&accumulator);
-    for (mirror::Class* klass : accumulator.classes_) {
+    for (ObjPtr<mirror::Class> klass : accumulator.classes_) {
       if (!visitor->operator()(klass)) {
         return;
       }
@@ -2033,8 +2029,8 @@
         // Add 100 in case new classes get loaded when we are filling in the object array.
         class_table_size = NumZygoteClasses() + NumNonZygoteClasses() + 100;
       }
-      mirror::Class* class_type = mirror::Class::GetJavaLangClass();
-      mirror::Class* array_of_class = FindArrayClass(self, &class_type);
+      ObjPtr<mirror::Class> class_type = mirror::Class::GetJavaLangClass();
+      ObjPtr<mirror::Class> array_of_class = FindArrayClass(self, &class_type);
       classes.Assign(
           mirror::ObjectArray<mirror::Class>::Alloc(self, array_of_class, class_table_size));
       CHECK(classes.Get() != nullptr);  // OOME.
@@ -2048,7 +2044,7 @@
       // If the class table shrank during creation of the clases array we expect null elements. If
       // the class table grew then the loop repeats. If classes are created after the loop has
       // finished then we don't visit.
-      mirror::Class* klass = classes->Get(i);
+      ObjPtr<mirror::Class> klass = classes->Get(i);
       if (klass != nullptr && !visitor->operator()(klass)) {
         return;
       }
@@ -2078,6 +2074,7 @@
   mirror::ShortArray::ResetArrayClass();
   mirror::MethodType::ResetClass();
   mirror::MethodHandleImpl::ResetClass();
+  mirror::EmulatedStackFrame::ResetClass();
   Thread* const self = Thread::Current();
   for (const ClassLoaderData& data : class_loaders_) {
     DeleteClassLoader(self, data);
@@ -2108,8 +2105,8 @@
 }
 
 void ClassLinker::InitializeDexCache(Thread* self,
-                                     mirror::DexCache* dex_cache,
-                                     mirror::String* location,
+                                     ObjPtr<mirror::DexCache> dex_cache,
+                                     ObjPtr<mirror::String> location,
                                      const DexFile& dex_file,
                                      LinearAlloc* linear_alloc) {
   ScopedAssertNoThreadSuspension sants(__FUNCTION__);
@@ -2147,21 +2144,18 @@
   //
   // If this needs to be mitigated in a production system running this code,
   // DexCache::kDexCacheMethodTypeCacheSize can be set to zero.
-  const bool is_method_handles_enabled = Runtime::Current()->IsMethodHandlesEnabled();
   mirror::MethodTypeDexCacheType* method_types = nullptr;
   size_t num_method_types = 0;
 
-  if (is_method_handles_enabled) {
-    if (dex_file.NumProtoIds() < mirror::DexCache::kDexCacheMethodTypeCacheSize) {
-      num_method_types = dex_file.NumProtoIds();
-    } else {
-      num_method_types = mirror::DexCache::kDexCacheMethodTypeCacheSize;
-    }
+  if (dex_file.NumProtoIds() < mirror::DexCache::kDexCacheMethodTypeCacheSize) {
+    num_method_types = dex_file.NumProtoIds();
+  } else {
+    num_method_types = mirror::DexCache::kDexCacheMethodTypeCacheSize;
+  }
 
-    if (num_method_types > 0) {
-      method_types = reinterpret_cast<mirror::MethodTypeDexCacheType*>(
-          raw_arrays + layout.MethodTypesOffset());
-    }
+  if (num_method_types > 0) {
+    method_types = reinterpret_cast<mirror::MethodTypeDexCacheType*>(
+        raw_arrays + layout.MethodTypesOffset());
   }
 
   DCHECK_ALIGNED(raw_arrays, alignof(mirror::StringDexCacheType)) <<
@@ -2213,18 +2207,18 @@
                   image_pointer_size_);
 }
 
-mirror::DexCache* ClassLinker::AllocDexCache(mirror::String** out_location,
+mirror::DexCache* ClassLinker::AllocDexCache(ObjPtr<mirror::String>* out_location,
                                              Thread* self,
                                              const DexFile& dex_file) {
   StackHandleScope<1> hs(self);
   DCHECK(out_location != nullptr);
-  auto dex_cache(hs.NewHandle(down_cast<mirror::DexCache*>(
+  auto dex_cache(hs.NewHandle(ObjPtr<mirror::DexCache>::DownCast(
       GetClassRoot(kJavaLangDexCache)->AllocObject(self))));
   if (dex_cache.Get() == nullptr) {
     self->AssertPendingOOMException();
     return nullptr;
   }
-  mirror::String* location = intern_table_->InternStrong(dex_file.GetLocation().c_str());
+  ObjPtr<mirror::String> location = intern_table_->InternStrong(dex_file.GetLocation().c_str());
   if (location == nullptr) {
     self->AssertPendingOOMException();
     return nullptr;
@@ -2236,22 +2230,23 @@
 mirror::DexCache* ClassLinker::AllocAndInitializeDexCache(Thread* self,
                                                           const DexFile& dex_file,
                                                           LinearAlloc* linear_alloc) {
-  mirror::String* location = nullptr;
-  mirror::DexCache* dex_cache = AllocDexCache(&location, self, dex_file);
+  ObjPtr<mirror::String> location = nullptr;
+  ObjPtr<mirror::DexCache> dex_cache = AllocDexCache(&location, self, dex_file);
   if (dex_cache != nullptr) {
     WriterMutexLock mu(self, dex_lock_);
     DCHECK(location != nullptr);
     InitializeDexCache(self, dex_cache, location, dex_file, linear_alloc);
   }
-  return dex_cache;
+  return dex_cache.Ptr();
 }
 
-mirror::Class* ClassLinker::AllocClass(Thread* self, mirror::Class* java_lang_Class,
+mirror::Class* ClassLinker::AllocClass(Thread* self,
+                                       ObjPtr<mirror::Class> java_lang_Class,
                                        uint32_t class_size) {
   DCHECK_GE(class_size, sizeof(mirror::Class));
   gc::Heap* heap = Runtime::Current()->GetHeap();
   mirror::Class::InitializeClassVisitor visitor(class_size);
-  mirror::Object* k = kMovingClasses ?
+  ObjPtr<mirror::Object> k = kMovingClasses ?
       heap->AllocObject<true>(self, java_lang_Class, class_size, visitor) :
       heap->AllocNonMovableObject<true>(self, java_lang_Class, class_size, visitor);
   if (UNLIKELY(k == nullptr)) {
@@ -2274,9 +2269,13 @@
 
 mirror::Class* ClassLinker::EnsureResolved(Thread* self,
                                            const char* descriptor,
-                                           mirror::Class* klass) {
+                                           ObjPtr<mirror::Class> klass) {
   DCHECK(klass != nullptr);
-  Thread::PoisonObjectPointersIfDebug();
+  if (kIsDebugBuild) {
+    StackHandleScope<1> hs(self);
+    HandleWrapperObjPtr<mirror::Class> h = hs.NewHandleWrapper(&klass);
+    Thread::PoisonObjectPointersIfDebug();
+  }
 
   // For temporary classes we must wait for them to be retired.
   if (init_done_ && klass->IsTemp()) {
@@ -2298,8 +2297,7 @@
     }
     CHECK(h_class->IsRetired());
     // Get the updated class from class table.
-    klass = LookupClass(self, descriptor, ComputeModifiedUtf8Hash(descriptor),
-                        h_class.Get()->GetClassLoader());
+    klass = LookupClass(self, descriptor, h_class.Get()->GetClassLoader());
   }
 
   // Wait for the class if it has not already been linked.
@@ -2310,7 +2308,7 @@
   static const size_t kSleepDurationUS = 1000;  // 1 ms.
   while (!klass->IsResolved() && !klass->IsErroneous()) {
     StackHandleScope<1> hs(self);
-    HandleWrapper<mirror::Class> h_class(hs.NewHandleWrapper(&klass));
+    HandleWrapperObjPtr<mirror::Class> h_class(hs.NewHandleWrapper(&klass));
     {
       ObjectTryLock<mirror::Class> lock(self, h_class);
       // Can not use a monitor wait here since it may block when returning and deadlock if another
@@ -2341,9 +2339,9 @@
     return nullptr;
   }
   // Return the loaded class.  No exceptions should be pending.
-  CHECK(klass->IsResolved()) << PrettyClass(klass);
+  CHECK(klass->IsResolved()) << klass->PrettyClass();
   self->AssertNoPendingException();
-  return klass;
+  return klass.Ptr();
 }
 
 typedef std::pair<const DexFile*, const DexFile::ClassDef*> ClassPathEntry;
@@ -2360,18 +2358,18 @@
   return ClassPathEntry(nullptr, nullptr);
 }
 
-bool ClassLinker::FindClassInPathClassLoader(ScopedObjectAccessAlreadyRunnable& soa,
-                                             Thread* self,
-                                             const char* descriptor,
-                                             size_t hash,
-                                             Handle<mirror::ClassLoader> class_loader,
-                                             mirror::Class** result) {
+bool ClassLinker::FindClassInBaseDexClassLoader(ScopedObjectAccessAlreadyRunnable& soa,
+                                                Thread* self,
+                                                const char* descriptor,
+                                                size_t hash,
+                                                Handle<mirror::ClassLoader> class_loader,
+                                                ObjPtr<mirror::Class>* result) {
   // Termination case: boot class-loader.
   if (IsBootClassLoader(soa, class_loader.Get())) {
     // The boot class loader, search the boot class path.
     ClassPathEntry pair = FindInClassPath(descriptor, hash, boot_class_path_);
     if (pair.second != nullptr) {
-      mirror::Class* klass = LookupClass(self, descriptor, hash, nullptr);
+      ObjPtr<mirror::Class> klass = LookupClass(self, descriptor, hash, nullptr);
       if (klass != nullptr) {
         *result = EnsureResolved(self, descriptor, klass);
       } else {
@@ -2395,14 +2393,24 @@
   // Unsupported class-loader?
   if (soa.Decode<mirror::Class>(WellKnownClasses::dalvik_system_PathClassLoader) !=
       class_loader->GetClass()) {
-    *result = nullptr;
-    return false;
+    // PathClassLoader is the most common case, so it's the one we check first. For secondary dex
+    // files, we also check DexClassLoader here.
+    if (soa.Decode<mirror::Class>(WellKnownClasses::dalvik_system_DexClassLoader) !=
+        class_loader->GetClass()) {
+      *result = nullptr;
+      return false;
+    }
   }
 
   // Handles as RegisterDexFile may allocate dex caches (and cause thread suspension).
   StackHandleScope<4> hs(self);
   Handle<mirror::ClassLoader> h_parent(hs.NewHandle(class_loader->GetParent()));
-  bool recursive_result = FindClassInPathClassLoader(soa, self, descriptor, hash, h_parent, result);
+  bool recursive_result = FindClassInBaseDexClassLoader(soa,
+                                                        self,
+                                                        descriptor,
+                                                        hash,
+                                                        h_parent,
+                                                        result);
 
   if (!recursive_result) {
     // Something wrong up the chain.
@@ -2418,16 +2426,17 @@
   // Handle as if this is the child PathClassLoader.
   // The class loader is a PathClassLoader which inherits from BaseDexClassLoader.
   // We need to get the DexPathList and loop through it.
-  ArtField* const cookie_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie);
+  ArtField* const cookie_field =
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexFile_cookie);
   ArtField* const dex_file_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
   ObjPtr<mirror::Object> dex_path_list =
-      soa.DecodeField(WellKnownClasses::dalvik_system_PathClassLoader_pathList)->
-      GetObject(class_loader.Get());
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_BaseDexClassLoader_pathList)->
+          GetObject(class_loader.Get());
   if (dex_path_list != nullptr && dex_file_field != nullptr && cookie_field != nullptr) {
     // DexPathList has an array dexElements of Elements[] which each contain a dex file.
     ObjPtr<mirror::Object> dex_elements_obj =
-        soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList_dexElements)->
+        jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList_dexElements)->
         GetObject(dex_path_list);
     // Loop through each dalvik.system.DexPathList$Element's dalvik.system.DexFile and look
     // at the mCookie which is a DexFile vector.
@@ -2456,7 +2465,7 @@
             const DexFile::ClassDef* dex_class_def =
                 OatDexFile::FindClassDef(*cp_dex_file, descriptor, hash);
             if (dex_class_def != nullptr) {
-              mirror::Class* klass = DefineClass(self,
+              ObjPtr<mirror::Class> klass = DefineClass(self,
                                                  descriptor,
                                                  hash,
                                                  class_loader,
@@ -2496,7 +2505,7 @@
   }
   const size_t hash = ComputeModifiedUtf8Hash(descriptor);
   // Find the class in the loaded classes table.
-  mirror::Class* klass = LookupClass(self, descriptor, hash, class_loader.Get());
+  ObjPtr<mirror::Class> klass = LookupClass(self, descriptor, hash, class_loader.Get());
   if (klass != nullptr) {
     return EnsureResolved(self, descriptor, klass);
   }
@@ -2517,21 +2526,21 @@
       // The boot class loader is searched ahead of the application class loader, failures are
       // expected and will be wrapped in a ClassNotFoundException. Use the pre-allocated error to
       // trigger the chaining with a proper stack trace.
-      mirror::Throwable* pre_allocated = Runtime::Current()->GetPreAllocatedNoClassDefFoundError();
+      ObjPtr<mirror::Throwable> pre_allocated = Runtime::Current()->GetPreAllocatedNoClassDefFoundError();
       self->SetException(pre_allocated);
       return nullptr;
     }
   } else {
     ScopedObjectAccessUnchecked soa(self);
-    mirror::Class* cp_klass;
-    if (FindClassInPathClassLoader(soa, self, descriptor, hash, class_loader, &cp_klass)) {
+    ObjPtr<mirror::Class> cp_klass;
+    if (FindClassInBaseDexClassLoader(soa, self, descriptor, hash, class_loader, &cp_klass)) {
       // The chain was understood. So the value in cp_klass is either the class we were looking
       // for, or not found.
       if (cp_klass != nullptr) {
-        return cp_klass;
+        return cp_klass.Ptr();
       }
-      // TODO: We handle the boot classpath loader in FindClassInPathClassLoader. Try to unify this
-      //       and the branch above. TODO: throw the right exception here.
+      // TODO: We handle the boot classpath loader in FindClassInBaseDexClassLoader. Try to unify
+      //       this and the branch above. TODO: throw the right exception here.
 
       // We'll let the Java-side rediscover all this and throw the exception with the right stack
       // trace.
@@ -2539,7 +2548,7 @@
 
     if (Runtime::Current()->IsAotCompiler()) {
       // Oops, compile-time, can't run actual class-loader code.
-      mirror::Throwable* pre_allocated = Runtime::Current()->GetPreAllocatedNoClassDefFoundError();
+      ObjPtr<mirror::Throwable> pre_allocated = Runtime::Current()->GetPreAllocatedNoClassDefFoundError();
       self->SetException(pre_allocated);
       return nullptr;
     }
@@ -2599,6 +2608,8 @@
       klass.Assign(GetClassRoot(kJavaLangRefReference));
     } else if (strcmp(descriptor, "Ljava/lang/DexCache;") == 0) {
       klass.Assign(GetClassRoot(kJavaLangDexCache));
+    } else if (strcmp(descriptor, "Ldalvik/system/ClassExt;") == 0) {
+      klass.Assign(GetClassRoot(kDalvikSystemClassExt));
     }
   }
 
@@ -2613,7 +2624,7 @@
     self->AssertPendingOOMException();
     return nullptr;
   }
-  mirror::DexCache* dex_cache = RegisterDexFile(dex_file, class_loader.Get());
+  ObjPtr<mirror::DexCache> dex_cache = RegisterDexFile(dex_file, class_loader.Get());
   if (dex_cache == nullptr) {
     self->AssertPendingOOMException();
     return nullptr;
@@ -2632,7 +2643,7 @@
   klass->SetClinitThreadId(self->GetTid());
 
   // Add the newly loaded class to the loaded classes table.
-  mirror::Class* existing = InsertClass(descriptor, klass.Get(), hash);
+  ObjPtr<mirror::Class> existing = InsertClass(descriptor, klass.Get(), hash);
   if (existing != nullptr) {
     // We failed to insert because we raced with another thread. Calling EnsureResolved may cause
     // this thread to block.
@@ -2778,7 +2789,7 @@
 
 // Special case to get oat code without overwriting a trampoline.
 const void* ClassLinker::GetQuickOatCodeFor(ArtMethod* method) {
-  CHECK(method->IsInvokable()) << PrettyMethod(method);
+  CHECK(method->IsInvokable()) << method->PrettyMethod();
   if (method->IsProxyMethod()) {
     return GetQuickProxyInvokeHandler();
   }
@@ -2818,6 +2829,13 @@
     return true;
   }
 
+  if (runtime->IsFullyDeoptable()) {
+    // We need to be able to deoptimize at any time so we should always just ignore precompiled
+    // code and go to the interpreter assuming we don't already have jitted code.
+    jit::Jit* jit = Runtime::Current()->GetJit();
+    return (jit == nullptr) || !jit->GetCodeCache()->ContainsPc(quick_code);
+  }
+
   if (runtime->IsNativeDebuggable()) {
     DCHECK(runtime->UseJitCompilation() && runtime->GetJit()->JitAtFirstUse());
     // If we are doing native debugging, ignore application's AOT code,
@@ -2836,8 +2854,8 @@
   return false;
 }
 
-void ClassLinker::FixupStaticTrampolines(mirror::Class* klass) {
-  DCHECK(klass->IsInitialized()) << PrettyDescriptor(klass);
+void ClassLinker::FixupStaticTrampolines(ObjPtr<mirror::Class> klass) {
+  DCHECK(klass->IsInitialized()) << klass->PrettyDescriptor();
   if (klass->NumDirectMethods() == 0) {
     return;  // No direct methods => no static methods.
   }
@@ -2853,7 +2871,7 @@
   CHECK(dex_class_def != nullptr);
   const uint8_t* class_data = dex_file.GetClassData(*dex_class_def);
   // There should always be class data if there were direct methods.
-  CHECK(class_data != nullptr) << PrettyDescriptor(klass);
+  CHECK(class_data != nullptr) << klass->PrettyDescriptor();
   ClassDataItemIterator it(dex_file, class_data);
   // Skip fields
   while (it.HasNextStaticField()) {
@@ -2957,7 +2975,7 @@
 void ClassLinker::SetupClass(const DexFile& dex_file,
                              const DexFile::ClassDef& dex_class_def,
                              Handle<mirror::Class> klass,
-                             mirror::ClassLoader* class_loader) {
+                             ObjPtr<mirror::ClassLoader> class_loader) {
   CHECK(klass.Get() != nullptr);
   CHECK(klass->GetDexCache() != nullptr);
   CHECK_EQ(mirror::Class::kStatusNotReady, klass->GetStatus());
@@ -3023,7 +3041,7 @@
   return ret;
 }
 
-LinearAlloc* ClassLinker::GetAllocatorForClassLoader(mirror::ClassLoader* class_loader) {
+LinearAlloc* ClassLinker::GetAllocatorForClassLoader(ObjPtr<mirror::ClassLoader> class_loader) {
   if (class_loader == nullptr) {
     return Runtime::Current()->GetLinearAlloc();
   }
@@ -3032,7 +3050,7 @@
   return allocator;
 }
 
-LinearAlloc* ClassLinker::GetOrCreateAllocatorForClassLoader(mirror::ClassLoader* class_loader) {
+LinearAlloc* ClassLinker::GetOrCreateAllocatorForClassLoader(ObjPtr<mirror::ClassLoader> class_loader) {
   if (class_loader == nullptr) {
     return Runtime::Current()->GetLinearAlloc();
   }
@@ -3092,7 +3110,7 @@
     }
     if (UNLIKELY(num_sfields != it.NumStaticFields()) ||
         UNLIKELY(num_ifields != it.NumInstanceFields())) {
-      LOG(WARNING) << "Duplicate fields in class " << PrettyDescriptor(klass.Get())
+      LOG(WARNING) << "Duplicate fields in class " << klass->PrettyDescriptor()
           << " (unique static fields: " << num_sfields << "/" << it.NumStaticFields()
           << ", unique instance fields: " << num_ifields << "/" << it.NumInstanceFields() << ")";
       // NOTE: Not shrinking the over-allocated sfields/ifields, just setting size.
@@ -3208,7 +3226,7 @@
     } else {
       if (UNLIKELY((access_flags & kAccConstructor) == 0)) {
         LOG(WARNING) << method_name << " didn't have expected constructor access flag in class "
-            << PrettyDescriptor(klass.Get()) << " in dex file " << dex_file.GetLocation();
+            << klass->PrettyDescriptor() << " in dex file " << dex_file.GetLocation();
         access_flags |= kAccConstructor;
       }
     }
@@ -3276,13 +3294,13 @@
 }
 
 mirror::DexCache* ClassLinker::RegisterDexFile(const DexFile& dex_file,
-                                               mirror::ClassLoader* class_loader) {
+                                               ObjPtr<mirror::ClassLoader> class_loader) {
   Thread* self = Thread::Current();
   {
     ReaderMutexLock mu(self, dex_lock_);
-    mirror::DexCache* dex_cache = FindDexCacheLocked(self, dex_file, true);
+    ObjPtr<mirror::DexCache> dex_cache = FindDexCacheLocked(self, dex_file, true);
     if (dex_cache != nullptr) {
-      return dex_cache;
+      return dex_cache.Ptr();
     }
   }
   LinearAlloc* const linear_alloc = GetOrCreateAllocatorForClassLoader(class_loader);
@@ -3296,16 +3314,20 @@
   // suspend all threads and another thread may need the dex_lock_ to
   // get to a suspend point.
   StackHandleScope<2> hs(self);
-  mirror::String* location;
+  ObjPtr<mirror::String> location;
   Handle<mirror::DexCache> h_dex_cache(hs.NewHandle(AllocDexCache(/*out*/&location,
                                                                   self,
                                                                   dex_file)));
   Handle<mirror::String> h_location(hs.NewHandle(location));
   {
     WriterMutexLock mu(self, dex_lock_);
-    mirror::DexCache* dex_cache = FindDexCacheLocked(self, dex_file, true);
+    ObjPtr<mirror::DexCache> dex_cache = FindDexCacheLocked(self, dex_file, true);
     if (dex_cache != nullptr) {
-      return dex_cache;
+      // Another thread managed to initialize the dex cache faster, so use that DexCache.
+      // If this thread encountered OOME, ignore it.
+      DCHECK_EQ(h_dex_cache.Get() == nullptr, self->IsExceptionPending());
+      self->ClearException();
+      return dex_cache.Ptr();
     }
     if (h_dex_cache.Get() == nullptr) {
       self->AssertPendingOOMException();
@@ -3341,13 +3363,12 @@
   for (const DexCacheData& data : dex_caches_) {
     // Avoid decoding (and read barriers) other unrelated dex caches.
     if (data.dex_file == &dex_file) {
-      mirror::DexCache* dex_cache =
-          down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
+      ObjPtr<mirror::DexCache> dex_cache =
+          ObjPtr<mirror::DexCache>::DownCast(self->DecodeJObject(data.weak_root));
       if (dex_cache != nullptr) {
-        return dex_cache;
-      } else {
-        break;
+        return dex_cache.Ptr();
       }
+      break;
     }
   }
   if (allow_failure) {
@@ -3356,7 +3377,8 @@
   std::string location(dex_file.GetLocation());
   // Failure, dump diagnostic and abort.
   for (const DexCacheData& data : dex_caches_) {
-    mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(self->DecodeJObject(data.weak_root));
+    ObjPtr<mirror::DexCache> dex_cache =
+        ObjPtr<mirror::DexCache>::DownCast(self->DecodeJObject(data.weak_root));
     if (dex_cache != nullptr) {
       LOG(ERROR) << "Registered dex file " << dex_cache->GetDexFile()->GetLocation();
     }
@@ -3370,7 +3392,7 @@
   ReaderMutexLock mu(self, dex_lock_);
   for (const DexCacheData& data : dex_caches_) {
     if (!self->IsJWeakCleared(data.weak_root)) {
-      mirror::DexCache* dex_cache = down_cast<mirror::DexCache*>(
+      ObjPtr<mirror::DexCache> dex_cache = ObjPtr<mirror::DexCache>::DownCast(
           self->DecodeJObject(data.weak_root));
       if (dex_cache != nullptr) {
         dex_cache->Fixup(resolution_method, image_pointer_size_);
@@ -3380,7 +3402,8 @@
 }
 
 mirror::Class* ClassLinker::CreatePrimitiveClass(Thread* self, Primitive::Type type) {
-  mirror::Class* klass = AllocClass(self, mirror::Class::PrimitiveClassSize(image_pointer_size_));
+  ObjPtr<mirror::Class> klass =
+      AllocClass(self, mirror::Class::PrimitiveClassSize(image_pointer_size_));
   if (UNLIKELY(klass == nullptr)) {
     self->AssertPendingOOMException();
     return nullptr;
@@ -3388,7 +3411,7 @@
   return InitializePrimitiveClass(klass, type);
 }
 
-mirror::Class* ClassLinker::InitializePrimitiveClass(mirror::Class* primitive_class,
+mirror::Class* ClassLinker::InitializePrimitiveClass(ObjPtr<mirror::Class> primitive_class,
                                                      Primitive::Type type) {
   CHECK(primitive_class != nullptr);
   // Must hold lock on object when initializing.
@@ -3398,10 +3421,12 @@
   ObjectLock<mirror::Class> lock(self, h_class);
   h_class->SetAccessFlags(kAccPublic | kAccFinal | kAccAbstract);
   h_class->SetPrimitiveType(type);
+  h_class->SetIfTable(GetClassRoot(kJavaLangObject)->GetIfTable());
   mirror::Class::SetStatus(h_class, mirror::Class::kStatusInitialized, self);
   const char* descriptor = Primitive::Descriptor(type);
-  mirror::Class* existing = InsertClass(descriptor, h_class.Get(),
-                                        ComputeModifiedUtf8Hash(descriptor));
+  ObjPtr<mirror::Class> existing = InsertClass(descriptor,
+                                               h_class.Get(),
+                                               ComputeModifiedUtf8Hash(descriptor));
   CHECK(existing == nullptr) << "InitPrimitiveClass(" << type << ") failed";
   return h_class.Get();
 }
@@ -3460,9 +3485,9 @@
   // class to the hash table --- necessary because of possible races with
   // other threads.)
   if (class_loader.Get() != component_type->GetClassLoader()) {
-    mirror::Class* new_class = LookupClass(self, descriptor, hash, component_type->GetClassLoader());
+    ObjPtr<mirror::Class> new_class = LookupClass(self, descriptor, hash, component_type->GetClassLoader());
     if (new_class != nullptr) {
-      return new_class;
+      return new_class.Ptr();
     }
   }
 
@@ -3501,7 +3526,7 @@
   }
   ObjectLock<mirror::Class> lock(self, new_class);  // Must hold lock on object when initializing.
   DCHECK(new_class->GetComponentType() != nullptr);
-  mirror::Class* java_lang_Object = GetClassRoot(kJavaLangObject);
+  ObjPtr<mirror::Class> java_lang_Object = GetClassRoot(kJavaLangObject);
   new_class->SetSuperClass(java_lang_Object);
   new_class->SetVTable(java_lang_Object->GetVTable());
   new_class->SetPrimitiveType(Primitive::kPrimNot);
@@ -3532,7 +3557,7 @@
   // Use the single, global copies of "interfaces" and "iftable"
   // (remember not to free them for arrays).
   {
-    mirror::IfTable* array_iftable = array_iftable_.Read();
+    ObjPtr<mirror::IfTable> array_iftable = array_iftable_.Read();
     CHECK(array_iftable != nullptr);
     new_class->SetIfTable(array_iftable);
   }
@@ -3548,7 +3573,7 @@
 
   new_class->SetAccessFlags(access_flags);
 
-  mirror::Class* existing = InsertClass(descriptor, new_class.Get(), hash);
+  ObjPtr<mirror::Class> existing = InsertClass(descriptor, new_class.Get(), hash);
   if (existing == nullptr) {
     jit::Jit::NewTypeLoadedIfUsingJit(new_class.Get());
     return new_class.Get();
@@ -3559,7 +3584,7 @@
   //
   // (Yes, this happens.)
 
-  return existing;
+  return existing.Ptr();
 }
 
 mirror::Class* ClassLinker::FindPrimitiveClass(char type) {
@@ -3590,9 +3615,9 @@
   return nullptr;
 }
 
-mirror::Class* ClassLinker::InsertClass(const char* descriptor, mirror::Class* klass, size_t hash) {
+mirror::Class* ClassLinker::InsertClass(const char* descriptor, ObjPtr<mirror::Class> klass, size_t hash) {
   if (VLOG_IS_ON(class_linker)) {
-    mirror::DexCache* dex_cache = klass->GetDexCache();
+    ObjPtr<mirror::DexCache> dex_cache = klass->GetDexCache();
     std::string source;
     if (dex_cache != nullptr) {
       source += " from ";
@@ -3602,22 +3627,11 @@
   }
   {
     WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
-    mirror::ClassLoader* const class_loader = klass->GetClassLoader();
+    ObjPtr<mirror::ClassLoader> const class_loader = klass->GetClassLoader();
     ClassTable* const class_table = InsertClassTableForClassLoader(class_loader);
-    mirror::Class* existing = class_table->Lookup(descriptor, hash);
+    ObjPtr<mirror::Class> existing = class_table->Lookup(descriptor, hash);
     if (existing != nullptr) {
-      return existing;
-    }
-    if (kIsDebugBuild &&
-        !klass->IsTemp() &&
-        class_loader == nullptr &&
-        dex_cache_boot_image_class_lookup_required_) {
-      // Check a class loaded with the system class loader matches one in the image if the class
-      // is in the image.
-      existing = LookupClassFromBootImage(descriptor);
-      if (existing != nullptr) {
-        CHECK_EQ(klass, existing);
-      }
+      return existing.Ptr();
     }
     VerifyObject(klass);
     class_table->InsertWithHash(klass, hash);
@@ -3639,7 +3653,7 @@
 }
 
 // TODO This should really be in mirror::Class.
-void ClassLinker::UpdateClassMethods(mirror::Class* klass,
+void ClassLinker::UpdateClassMethods(ObjPtr<mirror::Class> klass,
                                      LengthPrefixedArray<ArtMethod>* new_methods) {
   klass->SetMethodsPtrUnchecked(new_methods,
                                 klass->NumDirectMethods(),
@@ -3648,7 +3662,7 @@
   Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(klass);
 }
 
-bool ClassLinker::RemoveClass(const char* descriptor, mirror::ClassLoader* class_loader) {
+bool ClassLinker::RemoveClass(const char* descriptor, ObjPtr<mirror::ClassLoader> class_loader) {
   WriterMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
   ClassTable* const class_table = ClassTableForClassLoader(class_loader);
   return class_table != nullptr && class_table->Remove(descriptor);
@@ -3657,98 +3671,23 @@
 mirror::Class* ClassLinker::LookupClass(Thread* self,
                                         const char* descriptor,
                                         size_t hash,
-                                        mirror::ClassLoader* class_loader) {
-  {
-    ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);
-    ClassTable* const class_table = ClassTableForClassLoader(class_loader);
-    if (class_table != nullptr) {
-      mirror::Class* result = class_table->Lookup(descriptor, hash);
-      if (result != nullptr) {
-        return result;
-      }
+                                        ObjPtr<mirror::ClassLoader> class_loader) {
+  ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);
+  ClassTable* const class_table = ClassTableForClassLoader(class_loader);
+  if (class_table != nullptr) {
+    ObjPtr<mirror::Class> result = class_table->Lookup(descriptor, hash);
+    if (result != nullptr) {
+      return result.Ptr();
     }
   }
-  if (class_loader != nullptr || !dex_cache_boot_image_class_lookup_required_) {
-    return nullptr;
-  }
-  // Lookup failed but need to search dex_caches_.
-  mirror::Class* result = LookupClassFromBootImage(descriptor);
-  if (result != nullptr) {
-    result = InsertClass(descriptor, result, hash);
-  } else {
-    // Searching the image dex files/caches failed, we don't want to get into this situation
-    // often as map searches are faster, so after kMaxFailedDexCacheLookups move all image
-    // classes into the class table.
-    constexpr uint32_t kMaxFailedDexCacheLookups = 1000;
-    if (++failed_dex_cache_class_lookups_ > kMaxFailedDexCacheLookups) {
-      AddBootImageClassesToClassTable();
-    }
-  }
-  return result;
-}
-
-static std::vector<mirror::ObjectArray<mirror::DexCache>*> GetImageDexCaches(
-    std::vector<gc::space::ImageSpace*> image_spaces) REQUIRES_SHARED(Locks::mutator_lock_) {
-  CHECK(!image_spaces.empty());
-  std::vector<mirror::ObjectArray<mirror::DexCache>*> dex_caches_vector;
-  for (gc::space::ImageSpace* image_space : image_spaces) {
-    mirror::Object* root = image_space->GetImageHeader().GetImageRoot(ImageHeader::kDexCaches);
-    DCHECK(root != nullptr);
-    dex_caches_vector.push_back(root->AsObjectArray<mirror::DexCache>());
-  }
-  return dex_caches_vector;
-}
-
-void ClassLinker::AddBootImageClassesToClassTable() {
-  if (dex_cache_boot_image_class_lookup_required_) {
-    AddImageClassesToClassTable(Runtime::Current()->GetHeap()->GetBootImageSpaces(),
-                                /*class_loader*/nullptr);
-    dex_cache_boot_image_class_lookup_required_ = false;
-  }
-}
-
-void ClassLinker::AddImageClassesToClassTable(std::vector<gc::space::ImageSpace*> image_spaces,
-                                              mirror::ClassLoader* class_loader) {
-  Thread* self = Thread::Current();
-  WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
-  ScopedAssertNoThreadSuspension ants("Moving image classes to class table");
-
-  ClassTable* const class_table = InsertClassTableForClassLoader(class_loader);
-
-  std::string temp;
-  std::vector<mirror::ObjectArray<mirror::DexCache>*> dex_caches_vector =
-      GetImageDexCaches(image_spaces);
-  for (mirror::ObjectArray<mirror::DexCache>* dex_caches : dex_caches_vector) {
-    for (int32_t i = 0; i < dex_caches->GetLength(); i++) {
-      mirror::DexCache* dex_cache = dex_caches->Get(i);
-      GcRoot<mirror::Class>* types = dex_cache->GetResolvedTypes();
-      for (int32_t j = 0, num_types = dex_cache->NumResolvedTypes(); j < num_types; j++) {
-        mirror::Class* klass = types[j].Read();
-        if (klass != nullptr) {
-          DCHECK_EQ(klass->GetClassLoader(), class_loader);
-          const char* descriptor = klass->GetDescriptor(&temp);
-          size_t hash = ComputeModifiedUtf8Hash(descriptor);
-          mirror::Class* existing = class_table->Lookup(descriptor, hash);
-          if (existing != nullptr) {
-            CHECK_EQ(existing, klass) << PrettyClassAndClassLoader(existing) << " != "
-                << PrettyClassAndClassLoader(klass);
-          } else {
-            class_table->Insert(klass);
-            if (log_new_class_table_roots_) {
-              new_class_roots_.push_back(GcRoot<mirror::Class>(klass));
-            }
-          }
-        }
-      }
-    }
-  }
+  return nullptr;
 }
 
 class MoveClassTableToPreZygoteVisitor : public ClassLoaderVisitor {
  public:
   explicit MoveClassTableToPreZygoteVisitor() {}
 
-  void Visit(mirror::ClassLoader* class_loader)
+  void Visit(ObjPtr<mirror::ClassLoader> class_loader)
       REQUIRES(Locks::classlinker_classes_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_) OVERRIDE {
     ClassTable* const class_table = class_loader->GetClassTable();
@@ -3765,40 +3704,20 @@
   VisitClassLoaders(&visitor);
 }
 
-mirror::Class* ClassLinker::LookupClassFromBootImage(const char* descriptor) {
-  ScopedAssertNoThreadSuspension ants("Image class lookup");
-  std::vector<mirror::ObjectArray<mirror::DexCache>*> dex_caches_vector =
-      GetImageDexCaches(Runtime::Current()->GetHeap()->GetBootImageSpaces());
-  for (mirror::ObjectArray<mirror::DexCache>* dex_caches : dex_caches_vector) {
-    for (int32_t i = 0; i < dex_caches->GetLength(); ++i) {
-      mirror::DexCache* dex_cache = dex_caches->Get(i);
-      const DexFile* dex_file = dex_cache->GetDexFile();
-      // Try binary searching the type index by descriptor.
-      const DexFile::TypeId* type_id = dex_file->FindTypeId(descriptor);
-      if (type_id != nullptr) {
-        uint16_t type_idx = dex_file->GetIndexForTypeId(*type_id);
-        mirror::Class* klass = dex_cache->GetResolvedType(type_idx);
-        if (klass != nullptr) {
-          return klass;
-        }
-      }
-    }
-  }
-  return nullptr;
-}
-
 // Look up classes by hash and descriptor and put all matching ones in the result array.
 class LookupClassesVisitor : public ClassLoaderVisitor {
  public:
-  LookupClassesVisitor(const char* descriptor, size_t hash, std::vector<mirror::Class*>* result)
+  LookupClassesVisitor(const char* descriptor,
+                       size_t hash,
+                       std::vector<ObjPtr<mirror::Class>>* result)
      : descriptor_(descriptor),
        hash_(hash),
        result_(result) {}
 
-  void Visit(mirror::ClassLoader* class_loader)
+  void Visit(ObjPtr<mirror::ClassLoader> class_loader)
       REQUIRES_SHARED(Locks::classlinker_classes_lock_, Locks::mutator_lock_) OVERRIDE {
     ClassTable* const class_table = class_loader->GetClassTable();
-    mirror::Class* klass = class_table->Lookup(descriptor_, hash_);
+    ObjPtr<mirror::Class> klass = class_table->Lookup(descriptor_, hash_);
     if (klass != nullptr) {
       result_->push_back(klass);
     }
@@ -3807,18 +3726,16 @@
  private:
   const char* const descriptor_;
   const size_t hash_;
-  std::vector<mirror::Class*>* const result_;
+  std::vector<ObjPtr<mirror::Class>>* const result_;
 };
 
-void ClassLinker::LookupClasses(const char* descriptor, std::vector<mirror::Class*>& result) {
+void ClassLinker::LookupClasses(const char* descriptor,
+                                std::vector<ObjPtr<mirror::Class>>& result) {
   result.clear();
-  if (dex_cache_boot_image_class_lookup_required_) {
-    AddBootImageClassesToClassTable();
-  }
   Thread* const self = Thread::Current();
   ReaderMutexLock mu(self, *Locks::classlinker_classes_lock_);
   const size_t hash = ComputeModifiedUtf8Hash(descriptor);
-  mirror::Class* klass = boot_class_table_.Lookup(descriptor, hash);
+  ObjPtr<mirror::Class> klass = boot_class_table_.Lookup(descriptor, hash);
   if (klass != nullptr) {
     result.push_back(klass);
   }
@@ -3843,8 +3760,8 @@
   // If we got this far then we have a hard failure.
   std::string error_msg =
       StringPrintf("Rejecting class %s that attempts to sub-type erroneous class %s",
-                   PrettyDescriptor(klass.Get()).c_str(),
-                   PrettyDescriptor(supertype.Get()).c_str());
+                   klass->PrettyDescriptor().c_str(),
+                   supertype->PrettyDescriptor().c_str());
   LOG(WARNING) << error_msg  << " in " << klass->GetDexCache()->GetLocation()->ToModifiedUtf8();
   StackHandleScope<1> hs(self);
   Handle<mirror::Throwable> cause(hs.NewHandle(self->GetException()));
@@ -3867,9 +3784,8 @@
   return false;
 }
 
-void ClassLinker::VerifyClass(Thread* self,
-                              Handle<mirror::Class> klass,
-                              verifier::HardFailLogMode log_level) {
+verifier::MethodVerifier::FailureKind ClassLinker::VerifyClass(
+    Thread* self, Handle<mirror::Class> klass, verifier::HardFailLogMode log_level) {
   {
     // TODO: assert that the monitor on the Class is held
     ObjectLock<mirror::Class> lock(self, klass);
@@ -3880,8 +3796,9 @@
         old_status == mirror::Class::kStatusVerifyingAtRuntime) {
       lock.WaitIgnoringInterrupts();
       CHECK(klass->IsErroneous() || (klass->GetStatus() > old_status))
-          << "Class '" << PrettyClass(klass.Get()) << "' performed an illegal verification state "
-          << "transition from " << old_status << " to " << klass->GetStatus();
+          << "Class '" << klass->PrettyClass()
+          << "' performed an illegal verification state transition from " << old_status
+          << " to " << klass->GetStatus();
       old_status = klass->GetStatus();
     }
 
@@ -3889,23 +3806,23 @@
     // this class as a parent to another.
     if (klass->IsErroneous()) {
       ThrowEarlierClassFailure(klass.Get());
-      return;
+      return verifier::MethodVerifier::kHardFailure;
     }
 
     // Don't attempt to re-verify if already sufficiently verified.
     if (klass->IsVerified()) {
       EnsureSkipAccessChecksMethods(klass);
-      return;
+      return verifier::MethodVerifier::kNoFailure;
     }
     if (klass->IsCompileTimeVerified() && Runtime::Current()->IsAotCompiler()) {
-      return;
+      return verifier::MethodVerifier::kNoFailure;
     }
 
     if (klass->GetStatus() == mirror::Class::kStatusResolved) {
       mirror::Class::SetStatus(klass, mirror::Class::kStatusVerifying, self);
     } else {
       CHECK_EQ(klass->GetStatus(), mirror::Class::kStatusRetryVerificationAtRuntime)
-            << PrettyClass(klass.Get());
+          << klass->PrettyClass();
       CHECK(!Runtime::Current()->IsAotCompiler());
       mirror::Class::SetStatus(klass, mirror::Class::kStatusVerifyingAtRuntime, self);
     }
@@ -3914,7 +3831,7 @@
     if (!Runtime::Current()->IsVerificationEnabled()) {
       mirror::Class::SetStatus(klass, mirror::Class::kStatusVerified, self);
       EnsureSkipAccessChecksMethods(klass);
-      return;
+      return verifier::MethodVerifier::kNoFailure;
     }
   }
 
@@ -3924,7 +3841,7 @@
   // If we have a superclass and we get a hard verification failure we can return immediately.
   if (supertype.Get() != nullptr && !AttemptSupertypeVerification(self, klass, supertype)) {
     CHECK(self->IsExceptionPending()) << "Verification error should be pending.";
-    return;
+    return verifier::MethodVerifier::kHardFailure;
   }
 
   // Verify all default super-interfaces.
@@ -3951,7 +3868,7 @@
       } else if (UNLIKELY(!AttemptSupertypeVerification(self, klass, iface))) {
         // We had a hard failure while verifying this interface. Just return immediately.
         CHECK(self->IsExceptionPending()) << "Verification error should be pending.";
-        return;
+        return verifier::MethodVerifier::kHardFailure;
       } else if (UNLIKELY(!iface->IsVerified())) {
         // We softly failed to verify the iface. Stop checking and clean up.
         // Put the iface into the supertype handle so we know what caused us to fail.
@@ -3977,8 +3894,8 @@
   //     oat_file_class_status == mirror::Class::kStatusError => !preverified
   DCHECK(!(oat_file_class_status == mirror::Class::kStatusError) || !preverified);
 
-  verifier::MethodVerifier::FailureKind verifier_failure = verifier::MethodVerifier::kNoFailure;
   std::string error_msg;
+  verifier::MethodVerifier::FailureKind verifier_failure = verifier::MethodVerifier::kNoFailure;
   if (!preverified) {
     Runtime* runtime = Runtime::Current();
     verifier_failure = verifier::MethodVerifier::VerifyClass(self,
@@ -3994,9 +3911,10 @@
 
   if (preverified || verifier_failure != verifier::MethodVerifier::kHardFailure) {
     if (!preverified && verifier_failure != verifier::MethodVerifier::kNoFailure) {
-      VLOG(class_linker) << "Soft verification failure in class " << PrettyDescriptor(klass.Get())
-          << " in " << klass->GetDexCache()->GetLocation()->ToModifiedUtf8()
-          << " because: " << error_msg;
+      VLOG(class_linker) << "Soft verification failure in class "
+                         << klass->PrettyDescriptor()
+                         << " in " << klass->GetDexCache()->GetLocation()->ToModifiedUtf8()
+                         << " because: " << error_msg;
     }
     self->AssertNoPendingException();
     // Make sure all classes referenced by catch blocks are resolved.
@@ -4027,7 +3945,7 @@
       }
     }
   } else {
-    VLOG(verifier) << "Verification failed on class " << PrettyDescriptor(klass.Get())
+    VLOG(verifier) << "Verification failed on class " << klass->PrettyDescriptor()
                   << " in " << klass->GetDexCache()->GetLocation()->ToModifiedUtf8()
                   << " because: " << error_msg;
     self->AssertNoPendingException();
@@ -4050,6 +3968,7 @@
       EnsureSkipAccessChecksMethods(klass);
     }
   }
+  return verifier_failure;
 }
 
 void ClassLinker::EnsureSkipAccessChecksMethods(Handle<mirror::Class> klass) {
@@ -4060,7 +3979,7 @@
 }
 
 bool ClassLinker::VerifyClassUsingOatFile(const DexFile& dex_file,
-                                          mirror::Class* klass,
+                                          ObjPtr<mirror::Class> klass,
                                           mirror::Class::Status& oat_file_class_status) {
   // If we're compiling, we can only verify the class using the oat file if
   // we are not compiling the image or if the class we're verifying is not part of
@@ -4143,7 +4062,7 @@
   }
   std::string temp;
   LOG(FATAL) << "Unexpected class status: " << oat_file_class_status
-             << " " << dex_file.GetLocation() << " " << PrettyClass(klass) << " "
+             << " " << dex_file.GetLocation() << " " << klass->PrettyClass() << " "
              << klass->GetDescriptor(&temp);
   UNREACHABLE();
 }
@@ -4172,7 +4091,7 @@
       // Ensure exception types are resolved so that they don't need resolution to be delivered,
       // unresolved exception types will be ignored by exception delivery
       if (iterator.GetHandlerTypeIndex() != DexFile::kDexNoIndex16) {
-        mirror::Class* exception_type = ResolveType(iterator.GetHandlerTypeIndex(), method);
+        ObjPtr<mirror::Class> exception_type = ResolveType(iterator.GetHandlerTypeIndex(), method);
         if (exception_type == nullptr) {
           DCHECK(Thread::Current()->IsExceptionPending());
           Thread::Current()->ClearException();
@@ -4202,10 +4121,12 @@
   // Set the class access flags incl. VerificationAttempted, so we do not try to set the flag on
   // the methods.
   klass->SetAccessFlags(kAccClassIsProxy | kAccPublic | kAccFinal | kAccVerificationAttempted);
-  klass->SetClassLoader(soa.Decode<mirror::ClassLoader>(loader).Ptr());
+  klass->SetClassLoader(soa.Decode<mirror::ClassLoader>(loader));
   DCHECK_EQ(klass->GetPrimitiveType(), Primitive::kPrimNot);
-  klass->SetName(soa.Decode<mirror::String>(name).Ptr());
+  klass->SetName(soa.Decode<mirror::String>(name));
   klass->SetDexCache(GetClassRoot(kJavaLangReflectProxy)->GetDexCache());
+  // Object has an empty iftable, copy it for that reason.
+  klass->SetIfTable(GetClassRoot(kJavaLangObject)->GetIfTable());
   mirror::Class::SetStatus(klass, mirror::Class::kStatusIdx, self);
   std::string descriptor(GetDescriptorForProxy(klass.Get()));
   const size_t hash = ComputeModifiedUtf8Hash(descriptor.c_str());
@@ -4217,7 +4138,7 @@
   // (ArtField::declaring_class_) are only visited from the class
   // table. There can't be any suspend points between inserting the
   // class and setting the field arrays below.
-  mirror::Class* existing = InsertClass(descriptor.c_str(), klass.Get(), hash);
+  ObjPtr<mirror::Class> existing = InsertClass(descriptor.c_str(), klass.Get(), hash);
   CHECK(existing == nullptr);
 
   // Instance fields are inherited, but we add a couple of static fields...
@@ -4244,7 +4165,7 @@
   // They have as many virtual methods as the array
   auto h_methods = hs.NewHandle(soa.Decode<mirror::ObjectArray<mirror::Method>>(methods));
   DCHECK_EQ(h_methods->GetClass(), mirror::Method::ArrayClass())
-      << PrettyClass(h_methods->GetClass());
+      << mirror::Class::PrettyClass(h_methods->GetClass());
   const size_t num_virtual_methods = h_methods->GetLength();
 
   // Create the methods array.
@@ -4297,11 +4218,11 @@
   CHECK_EQ(interfaces_sfield.GetDeclaringClass(), klass.Get());
   interfaces_sfield.SetObject<false>(
       klass.Get(),
-      soa.Decode<mirror::ObjectArray<mirror::Class>>(interfaces).Ptr());
+      soa.Decode<mirror::ObjectArray<mirror::Class>>(interfaces));
   CHECK_EQ(throws_sfield.GetDeclaringClass(), klass.Get());
   throws_sfield.SetObject<false>(
       klass.Get(),
-      soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class>>>(throws).Ptr());
+      soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class>>>(throws));
 
   {
     // Lock on klass is released. Lock new class object.
@@ -4324,23 +4245,23 @@
     Handle<mirror::String> decoded_name = hs2.NewHandle(soa.Decode<mirror::String>(name));
     std::string interfaces_field_name(StringPrintf("java.lang.Class[] %s.interfaces",
                                                    decoded_name->ToModifiedUtf8().c_str()));
-    CHECK_EQ(PrettyField(klass->GetStaticField(0)), interfaces_field_name);
+    CHECK_EQ(ArtField::PrettyField(klass->GetStaticField(0)), interfaces_field_name);
 
     std::string throws_field_name(StringPrintf("java.lang.Class[][] %s.throws",
                                                decoded_name->ToModifiedUtf8().c_str()));
-    CHECK_EQ(PrettyField(klass->GetStaticField(1)), throws_field_name);
+    CHECK_EQ(ArtField::PrettyField(klass->GetStaticField(1)), throws_field_name);
 
     CHECK_EQ(klass.Get()->GetInterfaces(),
-             soa.Decode<mirror::ObjectArray<mirror::Class>>(interfaces).Ptr());
+             soa.Decode<mirror::ObjectArray<mirror::Class>>(interfaces));
     CHECK_EQ(klass.Get()->GetThrows(),
-             soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class>>>(throws).Ptr());
+             soa.Decode<mirror::ObjectArray<mirror::ObjectArray<mirror::Class>>>(throws));
   }
   return klass.Get();
 }
 
-std::string ClassLinker::GetDescriptorForProxy(mirror::Class* proxy_class) {
+std::string ClassLinker::GetDescriptorForProxy(ObjPtr<mirror::Class> proxy_class) {
   DCHECK(proxy_class->IsProxyClass());
-  mirror::String* name = proxy_class->GetName();
+  ObjPtr<mirror::String> name = proxy_class->GetName();
   DCHECK(name != nullptr);
   return DotToDescriptor(name->ToModifiedUtf8().c_str());
 }
@@ -4427,7 +4348,7 @@
            prototype->GetReturnType(true /* resolve */, image_pointer_size_));
 }
 
-bool ClassLinker::CanWeInitializeClass(mirror::Class* klass, bool can_init_statics,
+bool ClassLinker::CanWeInitializeClass(ObjPtr<mirror::Class> klass, bool can_init_statics,
                                        bool can_init_parents) {
   if (can_init_statics && can_init_parents) {
     return true;
@@ -4451,7 +4372,7 @@
     if (!klass->IsInterface()) {
       size_t num_interfaces = klass->GetIfTableCount();
       for (size_t i = 0; i < num_interfaces; i++) {
-        mirror::Class* iface = klass->GetIfTable()->GetInterface(i);
+        ObjPtr<mirror::Class> iface = klass->GetIfTable()->GetInterface(i);
         if (iface->HasDefaultMethods() &&
             !CanWeInitializeClass(iface, can_init_statics, can_init_parents)) {
           return false;
@@ -4462,7 +4383,7 @@
   if (klass->IsInterface() || !klass->HasSuperClass()) {
     return true;
   }
-  mirror::Class* super_class = klass->GetSuperClass();
+  ObjPtr<mirror::Class> super_class = klass->GetSuperClass();
   if (!can_init_parents && !super_class->IsInitialized()) {
     return false;
   }
@@ -4502,7 +4423,7 @@
       return false;
     }
 
-    CHECK(klass->IsResolved()) << PrettyClass(klass.Get()) << ": state=" << klass->GetStatus();
+    CHECK(klass->IsResolved()) << klass->PrettyClass() << ": state=" << klass->GetStatus();
 
     if (!klass->IsVerified()) {
       VerifyClass(self, klass);
@@ -4516,7 +4437,7 @@
           if (self->IsExceptionPending()) {
             // Check that it's a VerifyError.
             DCHECK_EQ("java.lang.Class<java.lang.VerifyError>",
-                      PrettyClass(self->GetException()->GetClass()));
+                      mirror::Class::PrettyClass(self->GetException()->GetClass()));
           } else {
             // Check that another thread attempted initialization.
             DCHECK_NE(0, klass->GetClinitThreadId());
@@ -4569,7 +4490,7 @@
     }
     self->AllowThreadSuspension();
 
-    CHECK_EQ(klass->GetStatus(), mirror::Class::kStatusVerified) << PrettyClass(klass.Get())
+    CHECK_EQ(klass->GetStatus(), mirror::Class::kStatusVerified) << klass->PrettyClass()
         << " self.tid=" << self->GetTid() << " clinit.tid=" << klass->GetClinitThreadId();
 
     // From here out other threads may observe that we're initializing and so changes of state
@@ -4582,7 +4503,7 @@
 
   // Initialize super classes, must be done while initializing for the JLS.
   if (!klass->IsInterface() && klass->HasSuperClass()) {
-    mirror::Class* super_class = klass->GetSuperClass();
+    ObjPtr<mirror::Class> super_class = klass->GetSuperClass();
     if (!super_class->IsInitialized()) {
       CHECK(!super_class->IsInterface());
       CHECK(can_init_parents);
@@ -4594,7 +4515,7 @@
         // the super class became erroneous due to initialization.
         CHECK(handle_scope_super->IsErroneous() && self->IsExceptionPending())
             << "Super class initialization failed for "
-            << PrettyDescriptor(handle_scope_super.Get())
+            << handle_scope_super->PrettyDescriptor()
             << " that has unexpected status " << handle_scope_super->GetStatus()
             << "\nPending exception:\n"
             << (self->GetException() != nullptr ? self->GetException()->Dump() : "");
@@ -4709,7 +4630,8 @@
     } else if (Runtime::Current()->IsTransactionAborted()) {
       // The exception thrown when the transaction aborted has been caught and cleared
       // so we need to throw it again now.
-      VLOG(compiler) << "Return from class initializer of " << PrettyDescriptor(klass.Get())
+      VLOG(compiler) << "Return from class initializer of "
+                     << mirror::Class::PrettyDescriptor(klass.Get())
                      << " without exception while transaction was aborted: re-throw it now.";
       Runtime::Current()->ThrowTransactionAbortError(self);
       mirror::Class::SetStatus(klass, mirror::Class::kStatusError, self);
@@ -4749,7 +4671,7 @@
     MutableHandle<mirror::Class> handle_super_iface(hs.NewHandle<mirror::Class>(nullptr));
     // First we initialize all of iface's super-interfaces recursively.
     for (size_t i = 0; i < num_direct_ifaces; i++) {
-      mirror::Class* super_iface = mirror::Class::GetDirectInterface(self, iface, i);
+      ObjPtr<mirror::Class> super_iface = mirror::Class::GetDirectInterface(self, iface, i);
       if (!super_iface->HasBeenRecursivelyInitialized()) {
         // Recursive step
         handle_super_iface.Assign(super_iface);
@@ -4811,14 +4733,14 @@
       // The caller wants an exception, but it was thrown in a
       // different thread.  Synthesize one here.
       ThrowNoClassDefFoundError("<clinit> failed for class %s; see exception in other thread",
-                                PrettyDescriptor(klass.Get()).c_str());
+                                klass->PrettyDescriptor().c_str());
       VlogClassInitializationFailure(klass);
       return false;
     }
     if (klass->IsInitialized()) {
       return true;
     }
-    LOG(FATAL) << "Unexpected class status. " << PrettyClass(klass.Get()) << " is "
+    LOG(FATAL) << "Unexpected class status. " << klass->PrettyClass() << " is "
         << klass->GetStatus();
   }
   UNREACHABLE();
@@ -4835,15 +4757,15 @@
   const DexFile::MethodId& method_id = dex_file->GetMethodId(m->GetDexMethodIndex());
   const DexFile::ProtoId& proto_id = dex_file->GetMethodPrototype(method_id);
   uint16_t return_type_idx = proto_id.return_type_idx_;
-  std::string return_type = PrettyType(return_type_idx, *dex_file);
-  std::string class_loader = PrettyTypeOf(m->GetDeclaringClass()->GetClassLoader());
+  std::string return_type = dex_file->PrettyType(return_type_idx);
+  std::string class_loader = mirror::Object::PrettyTypeOf(m->GetDeclaringClass()->GetClassLoader());
   ThrowWrappedLinkageError(klass.Get(),
                            "While checking class %s method %s signature against %s %s: "
                            "Failed to resolve return type %s with %s",
-                           PrettyDescriptor(klass.Get()).c_str(),
-                           PrettyMethod(method).c_str(),
+                           mirror::Class::PrettyDescriptor(klass.Get()).c_str(),
+                           ArtMethod::PrettyMethod(method).c_str(),
                            super_klass->IsInterface() ? "interface" : "superclass",
-                           PrettyDescriptor(super_klass.Get()).c_str(),
+                           mirror::Class::PrettyDescriptor(super_klass.Get()).c_str(),
                            return_type.c_str(), class_loader.c_str());
 }
 
@@ -4857,15 +4779,15 @@
   DCHECK(Thread::Current()->IsExceptionPending());
   DCHECK(!m->IsProxyMethod());
   const DexFile* dex_file = m->GetDexFile();
-  std::string arg_type = PrettyType(arg_type_idx, *dex_file);
-  std::string class_loader = PrettyTypeOf(m->GetDeclaringClass()->GetClassLoader());
+  std::string arg_type = dex_file->PrettyType(arg_type_idx);
+  std::string class_loader = mirror::Object::PrettyTypeOf(m->GetDeclaringClass()->GetClassLoader());
   ThrowWrappedLinkageError(klass.Get(),
                            "While checking class %s method %s signature against %s %s: "
                            "Failed to resolve arg %u type %s with %s",
-                           PrettyDescriptor(klass.Get()).c_str(),
-                           PrettyMethod(method).c_str(),
+                           mirror::Class::PrettyDescriptor(klass.Get()).c_str(),
+                           ArtMethod::PrettyMethod(method).c_str(),
                            super_klass->IsInterface() ? "interface" : "superclass",
-                           PrettyDescriptor(super_klass.Get()).c_str(),
+                           mirror::Class::PrettyDescriptor(super_klass.Get()).c_str(),
                            index, arg_type.c_str(), class_loader.c_str());
 }
 
@@ -4876,10 +4798,10 @@
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ThrowLinkageError(klass.Get(),
                     "Class %s method %s resolves differently in %s %s: %s",
-                    PrettyDescriptor(klass.Get()).c_str(),
-                    PrettyMethod(method).c_str(),
+                    mirror::Class::PrettyDescriptor(klass.Get()).c_str(),
+                    ArtMethod::PrettyMethod(method).c_str(),
                     super_klass->IsInterface() ? "interface" : "superclass",
-                    PrettyDescriptor(super_klass.Get()).c_str(),
+                    mirror::Class::PrettyDescriptor(super_klass.Get()).c_str(),
                     error_msg.c_str());
 }
 
@@ -4898,7 +4820,7 @@
       ThrowSignatureCheckResolveReturnTypeException(klass, super_klass, method1, method1);
       return false;
     }
-    mirror::Class* other_return_type = method2->GetReturnType(true /* resolve */,
+    ObjPtr<mirror::Class> other_return_type = method2->GetReturnType(true /* resolve */,
                                                               pointer_size);
     if (UNLIKELY(other_return_type == nullptr)) {
       ThrowSignatureCheckResolveReturnTypeException(klass, super_klass, method1, method2);
@@ -4907,10 +4829,10 @@
     if (UNLIKELY(other_return_type != return_type.Get())) {
       ThrowSignatureMismatch(klass, super_klass, method1,
                              StringPrintf("Return types mismatch: %s(%p) vs %s(%p)",
-                                          PrettyClassAndClassLoader(return_type.Get()).c_str(),
+                                          return_type->PrettyClassAndClassLoader().c_str(),
                                           return_type.Get(),
-                                          PrettyClassAndClassLoader(other_return_type).c_str(),
-                                          other_return_type));
+                                          other_return_type->PrettyClassAndClassLoader().c_str(),
+                                          other_return_type.Ptr()));
       return false;
     }
   }
@@ -4920,7 +4842,7 @@
     if (types2 != nullptr && types2->Size() != 0) {
       ThrowSignatureMismatch(klass, super_klass, method1,
                              StringPrintf("Type list mismatch with %s",
-                                          PrettyMethod(method2, true).c_str()));
+                                          method2->PrettyMethod(true).c_str()));
       return false;
     }
     return true;
@@ -4928,7 +4850,7 @@
     if (types1->Size() != 0) {
       ThrowSignatureMismatch(klass, super_klass, method1,
                              StringPrintf("Type list mismatch with %s",
-                                          PrettyMethod(method2, true).c_str()));
+                                          method2->PrettyMethod(true).c_str()));
       return false;
     }
     return true;
@@ -4937,7 +4859,7 @@
   if (UNLIKELY(num_types != types2->Size())) {
     ThrowSignatureMismatch(klass, super_klass, method1,
                            StringPrintf("Type list mismatch with %s",
-                                        PrettyMethod(method2, true).c_str()));
+                                        method2->PrettyMethod(true).c_str()));
     return false;
   }
   for (uint32_t i = 0; i < num_types; ++i) {
@@ -4951,7 +4873,7 @@
       return false;
     }
     uint32_t other_param_type_idx = types2->GetTypeItem(i).type_idx_;
-    mirror::Class* other_param_type =
+    ObjPtr<mirror::Class> other_param_type =
         method2->GetClassFromTypeIndex(other_param_type_idx, true /* resolve */, pointer_size);
     if (UNLIKELY(other_param_type == nullptr)) {
       ThrowSignatureCheckResolveArgException(klass, super_klass, method1,
@@ -4962,10 +4884,10 @@
       ThrowSignatureMismatch(klass, super_klass, method1,
                              StringPrintf("Parameter %u type mismatch: %s(%p) vs %s(%p)",
                                           i,
-                                          PrettyClassAndClassLoader(param_type.Get()).c_str(),
+                                          param_type->PrettyClassAndClassLoader().c_str(),
                                           param_type.Get(),
-                                          PrettyClassAndClassLoader(other_param_type).c_str(),
-                                          other_param_type));
+                                          other_param_type->PrettyClassAndClassLoader().c_str(),
+                                          other_param_type.Ptr()));
       return false;
     }
   }
@@ -5019,7 +4941,9 @@
   return true;
 }
 
-bool ClassLinker::EnsureInitialized(Thread* self, Handle<mirror::Class> c, bool can_init_fields,
+bool ClassLinker::EnsureInitialized(Thread* self,
+                                    Handle<mirror::Class> c,
+                                    bool can_init_fields,
                                     bool can_init_parents) {
   DCHECK(c.Get() != nullptr);
   if (c->IsInitialized()) {
@@ -5030,7 +4954,7 @@
   const bool success = InitializeClass(self, c, can_init_fields, can_init_parents);
   if (!success) {
     if (can_init_fields && can_init_parents) {
-      CHECK(self->IsExceptionPending()) << PrettyClass(c.Get());
+      CHECK(self->IsExceptionPending()) << c->PrettyClass();
     }
   } else {
     self->AssertNoPendingException();
@@ -5038,8 +4962,8 @@
   return success;
 }
 
-void ClassLinker::FixupTemporaryDeclaringClass(mirror::Class* temp_class,
-                                               mirror::Class* new_class) {
+void ClassLinker::FixupTemporaryDeclaringClass(ObjPtr<mirror::Class> temp_class,
+                                               ObjPtr<mirror::Class> new_class) {
   DCHECK_EQ(temp_class->NumInstanceFields(), 0u);
   for (ArtField& field : new_class->GetIFields()) {
     if (field.GetDeclaringClass() == temp_class) {
@@ -5067,7 +4991,7 @@
   Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(new_class);
 }
 
-void ClassLinker::RegisterClassLoader(mirror::ClassLoader* class_loader) {
+void ClassLinker::RegisterClassLoader(ObjPtr<mirror::ClassLoader> class_loader) {
   CHECK(class_loader->GetAllocator() == nullptr);
   CHECK(class_loader->GetClassTable() == nullptr);
   Thread* const self = Thread::Current();
@@ -5083,7 +5007,7 @@
   class_loaders_.push_back(data);
 }
 
-ClassTable* ClassLinker::InsertClassTableForClassLoader(mirror::ClassLoader* class_loader) {
+ClassTable* ClassLinker::InsertClassTableForClassLoader(ObjPtr<mirror::ClassLoader> class_loader) {
   if (class_loader == nullptr) {
     return &boot_class_table_;
   }
@@ -5096,11 +5020,11 @@
   return class_table;
 }
 
-ClassTable* ClassLinker::ClassTableForClassLoader(mirror::ClassLoader* class_loader) {
+ClassTable* ClassLinker::ClassTableForClassLoader(ObjPtr<mirror::ClassLoader> class_loader) {
   return class_loader == nullptr ? &boot_class_table_ : class_loader->GetClassTable();
 }
 
-static ImTable* FindSuperImt(mirror::Class* klass, PointerSize pointer_size)
+static ImTable* FindSuperImt(ObjPtr<mirror::Class> klass, PointerSize pointer_size)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   while (klass->HasSuperClass()) {
     klass = klass->GetSuperClass();
@@ -5171,7 +5095,7 @@
   if (!klass->IsTemp() || (!init_done_ && klass->GetClassSize() == class_size)) {
     // We don't need to retire this class as it has no embedded tables or it was created the
     // correct size during class linker initialization.
-    CHECK_EQ(klass->GetClassSize(), class_size) << PrettyDescriptor(klass.Get());
+    CHECK_EQ(klass->GetClassSize(), class_size) << klass->PrettyDescriptor();
 
     if (klass->ShouldHaveEmbeddedVTable()) {
       klass->PopulateEmbeddedVTable(image_pointer_size_);
@@ -5207,9 +5131,9 @@
 
     {
       WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
-      mirror::ClassLoader* const class_loader = h_new_class.Get()->GetClassLoader();
+      ObjPtr<mirror::ClassLoader> const class_loader = h_new_class.Get()->GetClassLoader();
       ClassTable* const table = InsertClassTableForClassLoader(class_loader);
-      mirror::Class* existing = table->UpdateClass(descriptor, h_new_class.Get(),
+      ObjPtr<mirror::Class> existing = table->UpdateClass(descriptor, h_new_class.Get(),
                                                    ComputeModifiedUtf8Hash(descriptor));
       if (class_loader != nullptr) {
         // We updated the class in the class table, perform the write barrier so that the GC knows
@@ -5217,14 +5141,6 @@
         Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader);
       }
       CHECK_EQ(existing, klass.Get());
-      if (kIsDebugBuild && class_loader == nullptr && dex_cache_boot_image_class_lookup_required_) {
-        // Check a class loaded with the system class loader matches one in the image if the class
-        // is in the image.
-        mirror::Class* const image_class = LookupClassFromBootImage(descriptor);
-        if (image_class != nullptr) {
-          CHECK_EQ(klass.Get(), existing) << descriptor;
-        }
-      }
       if (log_new_class_table_roots_) {
         new_class_roots_.push_back(GcRoot<mirror::Class>(h_new_class.Get()));
       }
@@ -5378,7 +5294,7 @@
 static bool CheckSuperClassChange(Handle<mirror::Class> klass,
                                   const DexFile& dex_file,
                                   const DexFile::ClassDef& class_def,
-                                  mirror::Class* super_class)
+                                  ObjPtr<mirror::Class> super_class)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   // Check for unexpected changes in the superclass.
   // Quick check 1) is the super_class class-loader the boot class loader? This always has
@@ -5418,13 +5334,13 @@
             LOG(WARNING) << "Incompatible structural change detected: " <<
                 StringPrintf(
                     "Structural change of %s is hazardous (%s at compile time, %s at runtime): %s",
-                    PrettyType(super_class_def->class_idx_, dex_file).c_str(),
+                    dex_file.PrettyType(super_class_def->class_idx_).c_str(),
                     class_oat_file->GetLocation().c_str(),
                     loaded_super_oat_file->GetLocation().c_str(),
                     error_msg.c_str());
             ThrowIncompatibleClassChangeError(klass.Get(),
                 "Structural change of %s is hazardous (%s at compile time, %s at runtime): %s",
-                PrettyType(super_class_def->class_idx_, dex_file).c_str(),
+                dex_file.PrettyType(super_class_def->class_idx_).c_str(),
                 class_oat_file->GetLocation().c_str(),
                 loaded_super_oat_file->GetLocation().c_str(),
                 error_msg.c_str());
@@ -5451,11 +5367,11 @@
     if (super_class_idx == class_def.class_idx_) {
       ThrowClassCircularityError(klass.Get(),
                                  "Class %s extends itself",
-                                 PrettyDescriptor(klass.Get()).c_str());
+                                 klass->PrettyDescriptor().c_str());
       return false;
     }
 
-    mirror::Class* super_class = ResolveType(dex_file, super_class_idx, klass.Get());
+    ObjPtr<mirror::Class> super_class = ResolveType(dex_file, super_class_idx, klass.Get());
     if (super_class == nullptr) {
       DCHECK(Thread::Current()->IsExceptionPending());
       return false;
@@ -5463,8 +5379,8 @@
     // Verify
     if (!klass->CanAccess(super_class)) {
       ThrowIllegalAccessError(klass.Get(), "Class %s extended by class %s is inaccessible",
-                              PrettyDescriptor(super_class).c_str(),
-                              PrettyDescriptor(klass.Get()).c_str());
+                              super_class->PrettyDescriptor().c_str(),
+                              klass->PrettyDescriptor().c_str());
       return false;
     }
     CHECK(super_class->IsResolved());
@@ -5479,7 +5395,7 @@
   if (interfaces != nullptr) {
     for (size_t i = 0; i < interfaces->Size(); i++) {
       uint16_t idx = interfaces->GetTypeItem(i).type_idx_;
-      mirror::Class* interface = ResolveType(dex_file, idx, klass.Get());
+      ObjPtr<mirror::Class> interface = ResolveType(dex_file, idx, klass.Get());
       if (interface == nullptr) {
         DCHECK(Thread::Current()->IsExceptionPending());
         return false;
@@ -5489,8 +5405,8 @@
         // TODO: the RI seemed to ignore this in my testing.
         ThrowIllegalAccessError(klass.Get(),
                                 "Interface %s implemented by class %s is inaccessible",
-                                PrettyDescriptor(interface).c_str(),
-                                PrettyDescriptor(klass.Get()).c_str());
+                                interface->PrettyDescriptor().c_str(),
+                                klass->PrettyDescriptor().c_str());
         return false;
       }
     }
@@ -5502,7 +5418,7 @@
 
 bool ClassLinker::LinkSuperClass(Handle<mirror::Class> klass) {
   CHECK(!klass->IsPrimitive());
-  mirror::Class* super = klass->GetSuperClass();
+  ObjPtr<mirror::Class> super = klass->GetSuperClass();
   if (klass.Get() == GetClassRoot(kJavaLangObject)) {
     if (super != nullptr) {
       ThrowClassFormatError(klass.Get(), "java.lang.Object must not have a superclass");
@@ -5512,22 +5428,22 @@
   }
   if (super == nullptr) {
     ThrowLinkageError(klass.Get(), "No superclass defined for class %s",
-                      PrettyDescriptor(klass.Get()).c_str());
+                      klass->PrettyDescriptor().c_str());
     return false;
   }
   // Verify
   if (super->IsFinal() || super->IsInterface()) {
     ThrowIncompatibleClassChangeError(klass.Get(),
                                       "Superclass %s of %s is %s",
-                                      PrettyDescriptor(super).c_str(),
-                                      PrettyDescriptor(klass.Get()).c_str(),
+                                      super->PrettyDescriptor().c_str(),
+                                      klass->PrettyDescriptor().c_str(),
                                       super->IsFinal() ? "declared final" : "an interface");
     return false;
   }
   if (!klass->CanAccess(super)) {
     ThrowIllegalAccessError(klass.Get(), "Superclass %s is inaccessible to class %s",
-                            PrettyDescriptor(super).c_str(),
-                            PrettyDescriptor(klass.Get()).c_str());
+                            super->PrettyDescriptor().c_str(),
+                            klass->PrettyDescriptor().c_str());
     return false;
   }
 
@@ -5552,7 +5468,7 @@
   if (init_done_ && super == GetClassRoot(kJavaLangRefReference)) {
     ThrowLinkageError(klass.Get(),
                       "Class %s attempts to subclass java.lang.ref.Reference, which is not allowed",
-                      PrettyDescriptor(klass.Get()).c_str());
+                      klass->PrettyDescriptor().c_str());
     return false;
   }
 
@@ -5594,7 +5510,7 @@
       REQUIRES_SHARED(Locks::mutator_lock_) :
       dex_file_(method->GetDexFile()), mid_(&dex_file_->GetMethodId(method->GetDexMethodIndex())),
       name_(nullptr), name_len_(0) {
-    DCHECK(!method->IsProxyMethod()) << PrettyMethod(method);
+    DCHECK(!method->IsProxyMethod()) << method->PrettyMethod();
   }
 
   const char* GetName() {
@@ -5606,7 +5522,7 @@
 
   bool HasSameNameAndSignature(ArtMethod* other)
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    DCHECK(!other->IsProxyMethod()) << PrettyMethod(other);
+    DCHECK(!other->IsProxyMethod()) << other->PrettyMethod();
     const DexFile* other_dex_file = other->GetDexFile();
     const DexFile::MethodId& other_mid = other_dex_file->GetMethodId(other->GetDexMethodIndex());
     if (dex_file_ == other_dex_file) {
@@ -5769,7 +5685,7 @@
     } else {
       DCHECK(super_class->IsAbstract() && !super_class->IsArrayClass());
       auto* super_vtable = super_class->GetVTable();
-      CHECK(super_vtable != nullptr) << PrettyClass(super_class.Get());
+      CHECK(super_vtable != nullptr) << super_class->PrettyClass();
       // We might need to change vtable if we have new virtual methods or new interfaces (since that
       // might give us new default methods). See comment above.
       if (num_virtual_methods == 0 && super_class->GetIfTableCount() == klass->GetIfTableCount()) {
@@ -5817,27 +5733,29 @@
     for (size_t j = 0; j < super_vtable_length; ++j) {
       // Search the hash table to see if we are overridden by any method.
       ArtMethod* super_method = vtable->GetElementPtrSize<ArtMethod*>(j, image_pointer_size_);
+      if (!klass->CanAccessMember(super_method->GetDeclaringClass(),
+                                  super_method->GetAccessFlags())) {
+        // Continue on to the next method since this one is package private and canot be overridden.
+        // Before Android 4.1, the package-private method super_method might have been incorrectly
+        // overridden.
+        continue;
+      }
       MethodNameAndSignatureComparator super_method_name_comparator(
           super_method->GetInterfaceMethodIfProxy(image_pointer_size_));
+      // We remove the method so that subsequent lookups will be faster by making the hash-map
+      // smaller as we go on.
       uint32_t hash_index = hash_table.FindAndRemove(&super_method_name_comparator);
       if (hash_index != hash_table.GetNotFoundIndex()) {
         ArtMethod* virtual_method = klass->GetVirtualMethodDuringLinking(
             hash_index, image_pointer_size_);
-        if (klass->CanAccessMember(super_method->GetDeclaringClass(),
-                                   super_method->GetAccessFlags())) {
-          if (super_method->IsFinal()) {
-            ThrowLinkageError(klass.Get(), "Method %s overrides final method in class %s",
-                              PrettyMethod(virtual_method).c_str(),
-                              super_method->GetDeclaringClassDescriptor());
-            return false;
-          }
-          vtable->SetElementPtrSize(j, virtual_method, image_pointer_size_);
-          virtual_method->SetMethodIndex(j);
-        } else {
-          LOG(WARNING) << "Before Android 4.1, method " << PrettyMethod(virtual_method)
-                       << " would have incorrectly overridden the package-private method in "
-                       << PrettyDescriptor(super_method->GetDeclaringClassDescriptor());
+        if (super_method->IsFinal()) {
+          ThrowLinkageError(klass.Get(), "Method %s overrides final method in class %s",
+                            virtual_method->PrettyMethod().c_str(),
+                            super_method->GetDeclaringClassDescriptor());
+          return false;
         }
+        vtable->SetElementPtrSize(j, virtual_method, image_pointer_size_);
+        virtual_method->SetMethodIndex(j);
       } else if (super_method->IsOverridableByDefaultMethod()) {
         // We didn't directly override this method but we might through default methods...
         // Check for default method update.
@@ -5881,9 +5799,10 @@
               // then.
               default_translations->insert(
                   {j, ClassLinker::MethodTranslation::CreateTranslatedMethod(default_method)});
-              VLOG(class_linker) << "Method " << PrettyMethod(super_method)
-                                 << " overridden by default " << PrettyMethod(default_method)
-                                 << " in " << PrettyClass(klass.Get());
+              VLOG(class_linker) << "Method " << super_method->PrettyMethod()
+                                 << " overridden by default "
+                                 << default_method->PrettyMethod()
+                                 << " in " << mirror::Class::PrettyClass(klass.Get());
             }
             break;
           }
@@ -6045,7 +5964,8 @@
         // The verifier should have caught the non-public method for dex version 37. Just warn and
         // skip it since this is from before default-methods so we don't really need to care that it
         // has code.
-        LOG(WARNING) << "Interface method " << PrettyMethod(current_method) << " is not public! "
+        LOG(WARNING) << "Interface method " << current_method->PrettyMethod()
+                     << " is not public! "
                      << "This will be a fatal error in subsequent versions of android. "
                      << "Continuing anyway.";
       }
@@ -6062,9 +5982,9 @@
                                         iface,
                                         image_pointer_size_)) {
           VLOG(class_linker) << "Conflicting default method implementations found: "
-                             << PrettyMethod(current_method) << " and "
-                             << PrettyMethod(*out_default_method) << " in class "
-                             << PrettyClass(klass.Get()) << " conflict.";
+                             << current_method->PrettyMethod() << " and "
+                             << ArtMethod::PrettyMethod(*out_default_method) << " in class "
+                             << klass->PrettyClass() << " conflict.";
           *out_default_method = nullptr;
           return DefaultMethodSearchResult::kDefaultConflict;
         } else {
@@ -6087,25 +6007,27 @@
           // We should now finish traversing the graph to find if we have default methods that
           // conflict.
         } else {
-          VLOG(class_linker) << "A default method '" << PrettyMethod(current_method) << "' was "
-                            << "skipped because it was overridden by an abstract method in a "
-                            << "subinterface on class '" << PrettyClass(klass.Get()) << "'";
+          VLOG(class_linker) << "A default method '" << current_method->PrettyMethod()
+                             << "' was "
+                             << "skipped because it was overridden by an abstract method in a "
+                             << "subinterface on class '" << klass->PrettyClass() << "'";
         }
       }
       break;
     }
   }
   if (*out_default_method != nullptr) {
-    VLOG(class_linker) << "Default method '" << PrettyMethod(*out_default_method) << "' selected "
-                       << "as the implementation for '" << PrettyMethod(target_method) << "' "
-                       << "in '" << PrettyClass(klass.Get()) << "'";
+    VLOG(class_linker) << "Default method '" << (*out_default_method)->PrettyMethod()
+                       << "' selected "
+                       << "as the implementation for '" << target_method->PrettyMethod()
+                       << "' in '" << klass->PrettyClass() << "'";
     return DefaultMethodSearchResult::kDefaultFound;
   } else {
     return DefaultMethodSearchResult::kAbstractFound;
   }
 }
 
-ArtMethod* ClassLinker::AddMethodToConflictTable(mirror::Class* klass,
+ArtMethod* ClassLinker::AddMethodToConflictTable(ObjPtr<mirror::Class> klass,
                                                  ArtMethod* conflict_method,
                                                  ArtMethod* interface_method,
                                                  ArtMethod* method,
@@ -6175,9 +6097,9 @@
   }
 }
 
-void ClassLinker::FillIMTAndConflictTables(mirror::Class* klass) {
-  DCHECK(klass->ShouldHaveImt()) << PrettyClass(klass);
-  DCHECK(!klass->IsTemp()) << PrettyClass(klass);
+void ClassLinker::FillIMTAndConflictTables(ObjPtr<mirror::Class> klass) {
+  DCHECK(klass->ShouldHaveImt()) << klass->PrettyClass();
+  DCHECK(!klass->IsTemp()) << klass->PrettyClass();
   ArtMethod* imt_data[ImTable::kSize];
   Runtime* const runtime = Runtime::Current();
   ArtMethod* const unimplemented_method = runtime->GetImtUnimplementedMethod();
@@ -6200,7 +6122,7 @@
   // Compare the IMT with the super class including the conflict methods. If they are equivalent,
   // we can just use the same pointer.
   ImTable* imt = nullptr;
-  mirror::Class* super_class = klass->GetSuperClass();
+  ObjPtr<mirror::Class> super_class = klass->GetSuperClass();
   if (super_class != nullptr && super_class->ShouldHaveImt()) {
     ImTable* super_imt = super_class->GetImt(image_pointer_size_);
     bool same = true;
@@ -6250,17 +6172,17 @@
   return CreateImtConflictTable(count, linear_alloc, image_pointer_size_);
 }
 
-void ClassLinker::FillIMTFromIfTable(mirror::IfTable* if_table,
+void ClassLinker::FillIMTFromIfTable(ObjPtr<mirror::IfTable> if_table,
                                      ArtMethod* unimplemented_method,
                                      ArtMethod* imt_conflict_method,
-                                     mirror::Class* klass,
+                                     ObjPtr<mirror::Class> klass,
                                      bool create_conflict_tables,
                                      bool ignore_copied_methods,
                                      /*out*/bool* new_conflict,
                                      /*out*/ArtMethod** imt) {
   uint32_t conflict_counts[ImTable::kSize] = {};
   for (size_t i = 0, length = if_table->Count(); i < length; ++i) {
-    mirror::Class* interface = if_table->GetInterface(i);
+    ObjPtr<mirror::Class> interface = if_table->GetInterface(i);
     const size_t num_virtuals = interface->NumVirtualMethods();
     const size_t method_array_count = if_table->GetMethodArrayCount(i);
     // Virtual methods can be larger than the if table methods if there are default methods.
@@ -6327,7 +6249,7 @@
     }
 
     for (size_t i = 0, length = if_table->Count(); i < length; ++i) {
-      mirror::Class* interface = if_table->GetInterface(i);
+      ObjPtr<mirror::Class> interface = if_table->GetInterface(i);
       const size_t method_array_count = if_table->GetMethodArrayCount(i);
       // Virtual methods can be larger than the if table methods if there are default methods.
       if (method_array_count == 0) {
@@ -6359,13 +6281,14 @@
 
 // Simple helper function that checks that no subtypes of 'val' are contained within the 'classes'
 // set.
-static bool NotSubinterfaceOfAny(const std::unordered_set<mirror::Class*>& classes,
-                                 mirror::Class* val)
+static bool NotSubinterfaceOfAny(
+    const std::unordered_set<ObjPtr<mirror::Class>, HashObjPtr>& classes,
+    ObjPtr<mirror::Class> val)
     REQUIRES(Roles::uninterruptible_)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   DCHECK(val != nullptr);
-  for (auto c : classes) {
-    if (val->IsAssignableFrom(&*c)) {
+  for (ObjPtr<mirror::Class> c : classes) {
+    if (val->IsAssignableFrom(c)) {
       return false;
     }
   }
@@ -6391,22 +6314,22 @@
 // super_ifcount entries filled in with the transitive closure of the interfaces of the superclass.
 // The other entries are uninitialized.  We will fill in the remaining entries in this function. The
 // iftable must be large enough to hold all interfaces without changing its size.
-static size_t FillIfTable(mirror::IfTable* iftable,
+static size_t FillIfTable(ObjPtr<mirror::IfTable> iftable,
                           size_t super_ifcount,
                           std::vector<mirror::Class*> to_process)
     REQUIRES(Roles::uninterruptible_)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   // This is the set of all class's already in the iftable. Used to make checking if a class has
   // already been added quicker.
-  std::unordered_set<mirror::Class*> classes_in_iftable;
+  std::unordered_set<ObjPtr<mirror::Class>, HashObjPtr> classes_in_iftable;
   // The first super_ifcount elements are from the superclass. We note that they are already added.
   for (size_t i = 0; i < super_ifcount; i++) {
-    mirror::Class* iface = iftable->GetInterface(i);
+    ObjPtr<mirror::Class> iface = iftable->GetInterface(i);
     DCHECK(NotSubinterfaceOfAny(classes_in_iftable, iface)) << "Bad ordering.";
     classes_in_iftable.insert(iface);
   }
   size_t filled_ifcount = super_ifcount;
-  for (mirror::Class* interface : to_process) {
+  for (ObjPtr<mirror::Class> interface : to_process) {
     // Let us call the first filled_ifcount elements of iftable the current-iface-list.
     // At this point in the loop current-iface-list has the invariant that:
     //    for every pair of interfaces I,J within it:
@@ -6419,7 +6342,7 @@
       // current-iface-list, skipping those already on it.
       int32_t ifcount = interface->GetIfTableCount();
       for (int32_t j = 0; j < ifcount; j++) {
-        mirror::Class* super_interface = interface->GetIfTable()->GetInterface(j);
+        ObjPtr<mirror::Class> super_interface = interface->GetIfTable()->GetInterface(j);
         if (!ContainsElement(classes_in_iftable, super_interface)) {
           DCHECK(NotSubinterfaceOfAny(classes_in_iftable, super_interface)) << "Bad ordering.";
           classes_in_iftable.insert(super_interface);
@@ -6436,23 +6359,24 @@
       // Check all super-interfaces are already in the list.
       int32_t ifcount = interface->GetIfTableCount();
       for (int32_t j = 0; j < ifcount; j++) {
-        mirror::Class* super_interface = interface->GetIfTable()->GetInterface(j);
+        ObjPtr<mirror::Class> super_interface = interface->GetIfTable()->GetInterface(j);
         DCHECK(ContainsElement(classes_in_iftable, super_interface))
-            << "Iftable does not contain " << PrettyClass(super_interface)
-            << ", a superinterface of " << PrettyClass(interface);
+            << "Iftable does not contain " << mirror::Class::PrettyClass(super_interface)
+            << ", a superinterface of " << interface->PrettyClass();
       }
     }
   }
   if (kIsDebugBuild) {
     // Check that the iftable is ordered correctly.
     for (size_t i = 0; i < filled_ifcount; i++) {
-      mirror::Class* if_a = iftable->GetInterface(i);
+      ObjPtr<mirror::Class> if_a = iftable->GetInterface(i);
       for (size_t j = i + 1; j < filled_ifcount; j++) {
-        mirror::Class* if_b = iftable->GetInterface(j);
+        ObjPtr<mirror::Class> if_b = iftable->GetInterface(j);
         // !(if_a <: if_b)
         CHECK(!if_b->IsAssignableFrom(if_a))
-            << "Bad interface order: " << PrettyClass(if_a) << " (index " << i << ") extends "
-            << PrettyClass(if_b) << " (index " << j << ") and so should be after it in the "
+            << "Bad interface order: " << mirror::Class::PrettyClass(if_a) << " (index " << i
+            << ") extends "
+            << if_b->PrettyClass() << " (index " << j << ") and so should be after it in the "
             << "interface list.";
       }
     }
@@ -6463,21 +6387,23 @@
 bool ClassLinker::SetupInterfaceLookupTable(Thread* self, Handle<mirror::Class> klass,
                                             Handle<mirror::ObjectArray<mirror::Class>> interfaces) {
   StackHandleScope<1> hs(self);
-  const size_t super_ifcount =
-      klass->HasSuperClass() ? klass->GetSuperClass()->GetIfTableCount() : 0U;
+  const bool has_superclass = klass->HasSuperClass();
+  const size_t super_ifcount = has_superclass ? klass->GetSuperClass()->GetIfTableCount() : 0U;
   const bool have_interfaces = interfaces.Get() != nullptr;
   const size_t num_interfaces =
       have_interfaces ? interfaces->GetLength() : klass->NumDirectInterfaces();
   if (num_interfaces == 0) {
     if (super_ifcount == 0) {
+      if (LIKELY(has_superclass)) {
+        klass->SetIfTable(klass->GetSuperClass()->GetIfTable());
+      }
       // Class implements no interfaces.
       DCHECK_EQ(klass->GetIfTableCount(), 0);
-      DCHECK(klass->GetIfTable() == nullptr);
       return true;
     }
     // Class implements same interfaces as parent, are any of these not marker interfaces?
     bool has_non_marker_interface = false;
-    mirror::IfTable* super_iftable = klass->GetSuperClass()->GetIfTable();
+    ObjPtr<mirror::IfTable> super_iftable = klass->GetSuperClass()->GetIfTable();
     for (size_t i = 0; i < super_ifcount; ++i) {
       if (super_iftable->GetMethodArrayCount(i) > 0) {
         has_non_marker_interface = true;
@@ -6493,7 +6419,7 @@
   size_t ifcount = super_ifcount + num_interfaces;
   // Check that every class being implemented is an interface.
   for (size_t i = 0; i < num_interfaces; i++) {
-    mirror::Class* interface = have_interfaces
+    ObjPtr<mirror::Class> interface = have_interfaces
         ? interfaces->GetWithoutChecks(i)
         : mirror::Class::GetDirectInterface(self, klass, i);
     DCHECK(interface != nullptr);
@@ -6501,7 +6427,7 @@
       std::string temp;
       ThrowIncompatibleClassChangeError(klass.Get(),
                                         "Class %s implements non-interface class %s",
-                                        PrettyDescriptor(klass.Get()).c_str(),
+                                        klass->PrettyDescriptor().c_str(),
                                         PrettyDescriptor(interface->GetDescriptor(&temp)).c_str());
       return false;
     }
@@ -6515,9 +6441,9 @@
   }
   // Fill in table with superclass's iftable.
   if (super_ifcount != 0) {
-    mirror::IfTable* super_iftable = klass->GetSuperClass()->GetIfTable();
+    ObjPtr<mirror::IfTable> super_iftable = klass->GetSuperClass()->GetIfTable();
     for (size_t i = 0; i < super_ifcount; i++) {
-      mirror::Class* super_interface = super_iftable->GetInterface(i);
+      ObjPtr<mirror::Class> super_interface = super_iftable->GetInterface(i);
       iftable->SetInterface(i, super_interface);
     }
   }
@@ -6532,9 +6458,9 @@
     ScopedAssertNoThreadSuspension nts("Copying mirror::Class*'s for FillIfTable");
     std::vector<mirror::Class*> to_add;
     for (size_t i = 0; i < num_interfaces; i++) {
-      mirror::Class* interface = have_interfaces ? interfaces->Get(i) :
+      ObjPtr<mirror::Class> interface = have_interfaces ? interfaces->Get(i) :
           mirror::Class::GetDirectInterface(self, klass, i);
-      to_add.push_back(interface);
+      to_add.push_back(interface.Ptr());
     }
 
     new_ifcount = FillIfTable(iftable.Get(), super_ifcount, std::move(to_add));
@@ -6586,7 +6512,7 @@
     REQUIRES_SHARED(Locks::mutator_lock_) {
   StackHandleScope<2> hs(self);
   Handle<mirror::PointerArray> check_vtable(hs.NewHandle(klass->GetVTableDuringLinking()));
-  mirror::Class* super_temp = (klass->HasSuperClass()) ? klass->GetSuperClass() : nullptr;
+  ObjPtr<mirror::Class> super_temp = (klass->HasSuperClass()) ? klass->GetSuperClass() : nullptr;
   Handle<mirror::Class> superclass(hs.NewHandle(super_temp));
   int32_t super_vtable_length = (superclass.Get() != nullptr) ? superclass->GetVTableLength() : 0;
   for (int32_t i = 0; i < check_vtable->GetLength(); ++i) {
@@ -6594,16 +6520,17 @@
     CHECK(m != nullptr);
 
     CHECK_EQ(m->GetMethodIndexDuringLinking(), i)
-        << PrettyMethod(m) << " has an unexpected method index for its spot in the vtable for class"
-        << PrettyClass(klass.Get());
+        << m->PrettyMethod()
+        << " has an unexpected method index for its spot in the vtable for class"
+        << klass->PrettyClass();
     ArraySlice<ArtMethod> virtuals = klass->GetVirtualMethodsSliceUnchecked(pointer_size);
     auto is_same_method = [m] (const ArtMethod& meth) {
       return &meth == m;
     };
     CHECK((super_vtable_length > i && superclass->GetVTableEntry(i, pointer_size) == m) ||
           std::find_if(virtuals.begin(), virtuals.end(), is_same_method) != virtuals.end())
-        << PrettyMethod(m) << " does not seem to be owned by current class "
-        << PrettyClass(klass.Get()) << " or any of its superclasses!";
+        << m->PrettyMethod() << " does not seem to be owned by current class "
+        << klass->PrettyClass() << " or any of its superclasses!";
   }
 }
 
@@ -6625,14 +6552,20 @@
     }
     MethodNameAndSignatureComparator name_comparator(
         vtable_entry->GetInterfaceMethodIfProxy(pointer_size));
-    for (int32_t j = i+1; j < num_entries; j++) {
+    for (int32_t j = i + 1; j < num_entries; j++) {
       ArtMethod* other_entry = vtable->GetElementPtrSize<ArtMethod*>(j, pointer_size);
+      if (!klass->CanAccessMember(other_entry->GetDeclaringClass(),
+                                  other_entry->GetAccessFlags())) {
+        continue;
+      }
       CHECK(vtable_entry != other_entry &&
             !name_comparator.HasSameNameAndSignature(
                 other_entry->GetInterfaceMethodIfProxy(pointer_size)))
           << "vtable entries " << i << " and " << j << " are identical for "
-          << PrettyClass(klass.Get()) << " in method " << PrettyMethod(vtable_entry) << " and "
-          << PrettyMethod(other_entry);
+          << klass->PrettyClass() << " in method " << vtable_entry->PrettyMethod() << " (0x"
+          << std::hex << reinterpret_cast<uintptr_t>(vtable_entry) << ") and "
+          << other_entry->PrettyMethod() << "  (0x" << std::hex
+          << reinterpret_cast<uintptr_t>(other_entry) << ")";
     }
   }
 }
@@ -6649,7 +6582,7 @@
                                         bool* new_conflict,
                                         ArtMethod** imt) {
   DCHECK(klass->HasSuperClass());
-  mirror::Class* super_class = klass->GetSuperClass();
+  ObjPtr<mirror::Class> super_class = klass->GetSuperClass();
   if (super_class->ShouldHaveImt()) {
     ImTable* super_imt = super_class->GetImt(image_pointer_size_);
     for (size_t i = 0; i < ImTable::kSize; ++i) {
@@ -6657,8 +6590,8 @@
     }
   } else {
     // No imt in the super class, need to reconstruct from the iftable.
-    mirror::IfTable* if_table = super_class->GetIfTable();
-    if (if_table != nullptr) {
+    ObjPtr<mirror::IfTable> if_table = super_class->GetIfTable();
+    if (if_table->Count() != 0) {
       // Ignore copied methods since we will handle these in LinkInterfaceMethods.
       FillIMTFromIfTable(if_table,
                          unimplemented_method,
@@ -6728,9 +6661,9 @@
         // This is an interface implemented by a super-class. Therefore we can just copy the method
         // array from the superclass.
         const bool super_interface = is_super && extend_super_iftable;
-        mirror::PointerArray* method_array;
+        ObjPtr<mirror::PointerArray> method_array;
         if (super_interface) {
-          mirror::IfTable* if_table = klass->GetSuperClass()->GetIfTable();
+          ObjPtr<mirror::IfTable> if_table = klass->GetSuperClass()->GetIfTable();
           DCHECK(if_table != nullptr);
           DCHECK(if_table->GetMethodArray(i) != nullptr);
           // If we are working on a super interface, try extending the existing method array.
@@ -6829,7 +6762,8 @@
               self->EndAssertNoThreadSuspension(old_cause);
               ThrowIllegalAccessError(klass.Get(),
                   "Method '%s' implementing interface method '%s' is not public",
-                  PrettyMethod(vtable_method).c_str(), PrettyMethod(interface_method).c_str());
+                  vtable_method->PrettyMethod().c_str(),
+                  interface_method->PrettyMethod().c_str());
               return false;
             } else if (UNLIKELY(vtable_method->IsOverridableByDefaultMethod())) {
               // We might have a newer, better, default method for this, so we just skip it. If we
@@ -6888,8 +6822,10 @@
             // illegal states, incorrect vtable size, and incorrect or inconsistent iftable entries)
             // in this class and any subclasses.
             DCHECK(vtable_impl == nullptr || vtable_impl == supers_method)
-                << "vtable_impl was " << PrettyMethod(vtable_impl) << " and not 'nullptr' or "
-                << PrettyMethod(supers_method) << " as expected. IFTable appears to be corrupt!";
+                << "vtable_impl was " << ArtMethod::PrettyMethod(vtable_impl)
+                << " and not 'nullptr' or "
+                << supers_method->PrettyMethod()
+                << " as expected. IFTable appears to be corrupt!";
             vtable_impl = supers_method;
           }
         }
@@ -6989,7 +6925,7 @@
             ArtMethod* miranda_method = FindSameNameAndSignature(interface_name_comparator,
                                                                  miranda_methods);
             if (miranda_method == nullptr) {
-              DCHECK(interface_method->IsAbstract()) << PrettyMethod(interface_method);
+              DCHECK(interface_method->IsAbstract()) << interface_method->PrettyMethod();
               miranda_method = reinterpret_cast<ArtMethod*>(allocator.Alloc(method_size));
               CHECK(miranda_method != nullptr);
               // Point the interface table at a phantom slot.
@@ -7021,7 +6957,8 @@
   if (has_new_virtuals) {
     DCHECK(!is_interface || (default_methods.empty() && miranda_methods.empty()))
         << "Interfaces should only have default-conflict methods appended to them.";
-    VLOG(class_linker) << PrettyClass(klass.Get()) << ": miranda_methods=" << miranda_methods.size()
+    VLOG(class_linker) << mirror::Class::PrettyClass(klass.Get()) << ": miranda_methods="
+                       << miranda_methods.size()
                        << " default_methods=" << default_methods.size()
                        << " overriding_default_methods=" << overriding_default_methods.size()
                        << " default_conflict_methods=" << default_conflict_methods.size()
@@ -7154,7 +7091,7 @@
           auto translated_method_it = move_table.find(new_method);
           CHECK(translated_method_it != move_table.end())
               << "We must have a translation for methods added to the classes methods_ array! We "
-              << "could not find the ArtMethod added for " << PrettyMethod(new_method);
+              << "could not find the ArtMethod added for " << ArtMethod::PrettyMethod(new_method);
           ArtMethod* new_vtable_method = translated_method_it->second;
           // Leave the declaring class alone the method's dex_code_item_offset_ and dex_method_index_
           // fields are references into the dex file the method was defined in. Since the ArtMethod
@@ -7226,11 +7163,11 @@
         for (size_t j = 0, count = iftable->GetMethodArrayCount(i); j < count; ++j) {
           auto* method_array = iftable->GetMethodArray(i);
           auto* m = method_array->GetElementPtrSize<ArtMethod*>(j, image_pointer_size_);
-          DCHECK(m != nullptr) << PrettyClass(klass.Get());
+          DCHECK(m != nullptr) << klass->PrettyClass();
           auto it = move_table.find(m);
           if (it != move_table.end()) {
             auto* new_m = it->second;
-            DCHECK(new_m != nullptr) << PrettyClass(klass.Get());
+            DCHECK(new_m != nullptr) << klass->PrettyClass();
             method_array->SetElementPtrSize(j, new_m, image_pointer_size_);
           }
         }
@@ -7258,7 +7195,7 @@
                            [m] (ArtMethod& meth) {
                              return &meth == m;
                            }) != m->GetDeclaringClass()->GetMethods(image_pointer_size_).end())
-            << "Obsolete methods " << PrettyMethod(m) << " is in dex cache!";
+            << "Obsolete methods " << m->PrettyMethod() << " is in dex cache!";
       }
     }
     // Put some random garbage in old methods to help find stale pointers.
@@ -7338,15 +7275,15 @@
   if (is_static) {
     field_offset = klass->GetFirstReferenceStaticFieldOffsetDuringLinking(image_pointer_size_);
   } else {
-    mirror::Class* super_class = klass->GetSuperClass();
+    ObjPtr<mirror::Class> super_class = klass->GetSuperClass();
     if (super_class != nullptr) {
       CHECK(super_class->IsResolved())
-          << PrettyClass(klass.Get()) << " " << PrettyClass(super_class);
+          << klass->PrettyClass() << " " << super_class->PrettyClass();
       field_offset = MemberOffset(super_class->GetObjectSize());
     }
   }
 
-  CHECK_EQ(num_fields == 0, fields == nullptr) << PrettyClass(klass.Get());
+  CHECK_EQ(num_fields == 0, fields == nullptr) << klass->PrettyClass();
 
   // we want a relatively stable order so that adding new fields
   // minimizes disruption of C++ version such as Class and Method.
@@ -7412,9 +7349,9 @@
   if (!is_static && klass->DescriptorEquals("Ljava/lang/ref/Reference;")) {
     // We know there are no non-reference fields in the Reference classes, and we know
     // that 'referent' is alphabetically last, so this is easy...
-    CHECK_EQ(num_reference_fields, num_fields) << PrettyClass(klass.Get());
+    CHECK_EQ(num_reference_fields, num_fields) << klass->PrettyClass();
     CHECK_STREQ(fields->At(num_fields - 1).GetName(), "referent")
-        << PrettyClass(klass.Get());
+        << klass->PrettyClass();
     --num_reference_fields;
   }
 
@@ -7425,7 +7362,7 @@
     *class_size = size;
   } else {
     klass->SetNumReferenceInstanceFields(num_reference_fields);
-    mirror::Class* super_class = klass->GetSuperClass();
+    ObjPtr<mirror::Class> super_class = klass->GetSuperClass();
     if (num_reference_fields == 0 || super_class == nullptr) {
       // object has one reference field, klass, but we ignore it since we always visit the class.
       // super_class is null iff the class is java.lang.Object.
@@ -7437,17 +7374,17 @@
     if (kIsDebugBuild) {
       DCHECK_EQ(super_class == nullptr, klass->DescriptorEquals("Ljava/lang/Object;"));
       size_t total_reference_instance_fields = 0;
-      mirror::Class* cur_super = klass.Get();
+      ObjPtr<mirror::Class> cur_super = klass.Get();
       while (cur_super != nullptr) {
         total_reference_instance_fields += cur_super->NumReferenceInstanceFieldsDuringLinking();
         cur_super = cur_super->GetSuperClass();
       }
       if (super_class == nullptr) {
-        CHECK_EQ(total_reference_instance_fields, 1u) << PrettyDescriptor(klass.Get());
+        CHECK_EQ(total_reference_instance_fields, 1u) << klass->PrettyDescriptor();
       } else {
         // Check that there is at least num_reference_fields other than Object.class.
         CHECK_GE(total_reference_instance_fields, 1u + num_reference_fields)
-            << PrettyClass(klass.Get());
+            << klass->PrettyClass();
       }
     }
     if (!klass->IsVariableSize()) {
@@ -7475,8 +7412,8 @@
     for (size_t i = 0; i < num_fields; i++) {
       ArtField* field = &fields->At(i);
       VLOG(class_linker) << "LinkFields: " << (is_static ? "static" : "instance")
-          << " class=" << PrettyClass(klass.Get()) << " field=" << PrettyField(field) << " offset="
-          << field->GetOffsetDuringLinking();
+          << " class=" << klass->PrettyClass() << " field=" << field->PrettyField()
+          << " offset=" << field->GetOffsetDuringLinking();
       if (i != 0) {
         ArtField* const prev_field = &fields->At(i - 1);
         // NOTE: The field names can be the same. This is not possible in the Java language
@@ -7513,7 +7450,7 @@
 //  Set the bitmap of reference instance field offsets.
 void ClassLinker::CreateReferenceInstanceOffsets(Handle<mirror::Class> klass) {
   uint32_t reference_offsets = 0;
-  mirror::Class* super_class = klass->GetSuperClass();
+  ObjPtr<mirror::Class> super_class = klass->GetSuperClass();
   // Leave the reference offsets as 0 for mirror::Object (the class field is handled specially).
   if (super_class != nullptr) {
     reference_offsets = super_class->GetReferenceInstanceOffsets();
@@ -7543,33 +7480,33 @@
                                            uint32_t string_idx,
                                            Handle<mirror::DexCache> dex_cache) {
   DCHECK(dex_cache.Get() != nullptr);
-  mirror::String* resolved = dex_cache->GetResolvedString(string_idx);
   Thread::PoisonObjectPointersIfDebug();
+  ObjPtr<mirror::String> resolved = dex_cache->GetResolvedString(string_idx);
   if (resolved != nullptr) {
-    return resolved;
+    return resolved.Ptr();
   }
   uint32_t utf16_length;
   const char* utf8_data = dex_file.StringDataAndUtf16LengthByIdx(string_idx, &utf16_length);
-  mirror::String* string = intern_table_->InternStrong(utf16_length, utf8_data);
+  ObjPtr<mirror::String> string = intern_table_->InternStrong(utf16_length, utf8_data);
   dex_cache->SetResolvedString(string_idx, string);
-  return string;
+  return string.Ptr();
 }
 
 mirror::String* ClassLinker::LookupString(const DexFile& dex_file,
                                           uint32_t string_idx,
                                           Handle<mirror::DexCache> dex_cache) {
   DCHECK(dex_cache.Get() != nullptr);
-  mirror::String* resolved = dex_cache->GetResolvedString(string_idx);
+  ObjPtr<mirror::String> resolved = dex_cache->GetResolvedString(string_idx);
   if (resolved != nullptr) {
-    return resolved;
+    return resolved.Ptr();
   }
   uint32_t utf16_length;
   const char* utf8_data = dex_file.StringDataAndUtf16LengthByIdx(string_idx, &utf16_length);
-  mirror::String* string = intern_table_->LookupStrong(Thread::Current(), utf16_length, utf8_data);
+  ObjPtr<mirror::String> string = intern_table_->LookupStrong(Thread::Current(), utf16_length, utf8_data);
   if (string != nullptr) {
     dex_cache->SetResolvedString(string_idx, string);
   }
-  return string;
+  return string.Ptr();
 }
 
 ObjPtr<mirror::Class> ClassLinker::LookupResolvedType(const DexFile& dex_file,
@@ -7593,14 +7530,14 @@
     }
   }
   if (type != nullptr || type->IsResolved()) {
-    return type;
+    return type.Ptr();
   }
   return nullptr;
 }
 
 mirror::Class* ClassLinker::ResolveType(const DexFile& dex_file,
                                         uint16_t type_idx,
-                                        mirror::Class* referrer) {
+                                        ObjPtr<mirror::Class> referrer) {
   StackHandleScope<2> hs(Thread::Current());
   Handle<mirror::DexCache> dex_cache(hs.NewHandle(referrer->GetDexCache()));
   Handle<mirror::ClassLoader> class_loader(hs.NewHandle(referrer->GetClassLoader()));
@@ -7612,8 +7549,8 @@
                                         Handle<mirror::DexCache> dex_cache,
                                         Handle<mirror::ClassLoader> class_loader) {
   DCHECK(dex_cache.Get() != nullptr);
-  mirror::Class* resolved = dex_cache->GetResolvedType(type_idx);
   Thread::PoisonObjectPointersIfDebug();
+  ObjPtr<mirror::Class> resolved = dex_cache->GetResolvedType(type_idx);
   if (resolved == nullptr) {
     Thread* self = Thread::Current();
     const char* descriptor = dex_file.StringByTypeIdx(type_idx);
@@ -7638,8 +7575,8 @@
     }
   }
   DCHECK((resolved == nullptr) || resolved->IsResolved() || resolved->IsErroneous())
-      << PrettyDescriptor(resolved) << " " << resolved->GetStatus();
-  return resolved;
+      << resolved->PrettyDescriptor() << " " << resolved->GetStatus();
+  return resolved.Ptr();
 }
 
 template <ClassLinker::ResolveMode kResolveMode>
@@ -7665,7 +7602,7 @@
   }
   // Fail, get the declaring class.
   const DexFile::MethodId& method_id = dex_file.GetMethodId(method_idx);
-  mirror::Class* klass = ResolveType(dex_file, method_id.class_idx_, dex_cache, class_loader);
+  ObjPtr<mirror::Class> klass = ResolveType(dex_file, method_id.class_idx_, dex_cache, class_loader);
   if (klass == nullptr) {
     DCHECK(Thread::Current()->IsExceptionPending());
     return nullptr;
@@ -7703,7 +7640,7 @@
       if (UNLIKELY(!klass->IsInterface())) {
         ThrowIncompatibleClassChangeError(klass,
                                           "Found class %s, but interface was expected",
-                                          PrettyDescriptor(klass).c_str());
+                                          klass->PrettyDescriptor().c_str());
         return nullptr;
       } else {
         resolved = klass->FindInterfaceMethod(dex_cache.Get(), method_idx, image_pointer_size_);
@@ -7781,8 +7718,8 @@
       // If we found something, check that it can be accessed by the referrer.
       bool exception_generated = false;
       if (resolved != nullptr && referrer != nullptr) {
-        mirror::Class* methods_class = resolved->GetDeclaringClass();
-        mirror::Class* referring_class = referrer->GetDeclaringClass();
+        ObjPtr<mirror::Class> methods_class = resolved->GetDeclaringClass();
+        ObjPtr<mirror::Class> referring_class = referrer->GetDeclaringClass();
         if (!referring_class->CanAccess(methods_class)) {
           ThrowIllegalAccessErrorClassForMethodDispatch(referring_class,
                                                         methods_class,
@@ -7863,13 +7800,14 @@
   }
   // Fail, get the declaring class.
   const DexFile::MethodId& method_id = dex_file.GetMethodId(method_idx);
-  mirror::Class* klass = ResolveType(dex_file, method_id.class_idx_, dex_cache, class_loader);
+  ObjPtr<mirror::Class> klass = ResolveType(dex_file, method_id.class_idx_, dex_cache, class_loader);
   if (klass == nullptr) {
     Thread::Current()->AssertPendingException();
     return nullptr;
   }
   if (klass->IsInterface()) {
-    LOG(FATAL) << "ResolveAmbiguousMethod: unexpected method in interface: " << PrettyClass(klass);
+    LOG(FATAL) << "ResolveAmbiguousMethod: unexpected method in interface: "
+               << klass->PrettyClass();
     return nullptr;
   }
 
@@ -7965,9 +7903,9 @@
   DCHECK(Runtime::Current()->IsMethodHandlesEnabled());
   DCHECK(dex_cache.Get() != nullptr);
 
-  mirror::MethodType* resolved = dex_cache->GetResolvedMethodType(proto_idx);
+  ObjPtr<mirror::MethodType> resolved = dex_cache->GetResolvedMethodType(proto_idx);
   if (resolved != nullptr) {
-    return resolved;
+    return resolved.Ptr();
   }
 
   Thread* const self = Thread::Current();
@@ -7988,8 +7926,8 @@
   // other than by looking at the shorty ?
   const size_t num_method_args = strlen(dex_file.StringDataByIdx(proto_id.shorty_idx_)) - 1;
 
-  mirror::Class* class_type = mirror::Class::GetJavaLangClass();
-  mirror::Class* array_of_class = FindArrayClass(self, &class_type);
+  ObjPtr<mirror::Class> class_type = mirror::Class::GetJavaLangClass();
+  ObjPtr<mirror::Class> array_of_class = FindArrayClass(self, &class_type);
   Handle<mirror::ObjectArray<mirror::Class>> method_params(hs.NewHandle(
       mirror::ObjectArray<mirror::Class>::Alloc(self, array_of_class, num_method_args)));
   if (method_params.Get() == nullptr) {
@@ -8023,8 +7961,8 @@
 const char* ClassLinker::MethodShorty(uint32_t method_idx,
                                       ArtMethod* referrer,
                                       uint32_t* length) {
-  mirror::Class* declaring_class = referrer->GetDeclaringClass();
-  mirror::DexCache* dex_cache = declaring_class->GetDexCache();
+  ObjPtr<mirror::Class> declaring_class = referrer->GetDeclaringClass();
+  ObjPtr<mirror::DexCache> dex_cache = declaring_class->GetDexCache();
   const DexFile& dex_file = *dex_cache->GetDexFile();
   const DexFile::MethodId& method_id = dex_file.GetMethodId(method_idx);
   return dex_file.GetMethodShorty(method_id, length);
@@ -8034,7 +7972,7 @@
  public:
   explicit DumpClassVisitor(int flags) : flags_(flags) {}
 
-  bool operator()(mirror::Class* klass) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
+  bool operator()(ObjPtr<mirror::Class> klass) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
     klass->DumpClass(LOG_STREAM(ERROR), flags_);
     return true;
   }
@@ -8093,9 +8031,6 @@
 
 void ClassLinker::DumpForSigQuit(std::ostream& os) {
   ScopedObjectAccess soa(Thread::Current());
-  if (dex_cache_boot_image_class_lookup_required_) {
-    AddBootImageClassesToClassTable();
-  }
   ReaderMutexLock mu(soa.Self(), *Locks::classlinker_classes_lock_);
   os << "Zygote loaded classes=" << NumZygoteClasses() << " post zygote classes="
      << NumNonZygoteClasses() << "\n";
@@ -8105,7 +8040,7 @@
  public:
   CountClassesVisitor() : num_zygote_classes(0), num_non_zygote_classes(0) {}
 
-  void Visit(mirror::ClassLoader* class_loader)
+  void Visit(ObjPtr<mirror::ClassLoader> class_loader)
       REQUIRES_SHARED(Locks::classlinker_classes_lock_, Locks::mutator_lock_) OVERRIDE {
     ClassTable* const class_table = class_loader->GetClassTable();
     if (class_table != nullptr) {
@@ -8131,9 +8066,6 @@
 }
 
 size_t ClassLinker::NumLoadedClasses() {
-  if (dex_cache_boot_image_class_lookup_required_) {
-    AddBootImageClassesToClassTable();
-  }
   ReaderMutexLock mu(Thread::Current(), *Locks::classlinker_classes_lock_);
   // Only return non zygote classes since these are the ones which apps which care about.
   return NumNonZygoteClasses();
@@ -8147,7 +8079,7 @@
   return dex_lock_.GetExclusiveOwnerTid();
 }
 
-void ClassLinker::SetClassRoot(ClassRoot class_root, mirror::Class* klass) {
+void ClassLinker::SetClassRoot(ClassRoot class_root, ObjPtr<mirror::Class> klass) {
   DCHECK(!init_done_);
 
   DCHECK(klass != nullptr);
@@ -8182,6 +8114,7 @@
     "Ljava/lang/Throwable;",
     "Ljava/lang/ClassNotFoundException;",
     "Ljava/lang/StackTraceElement;",
+    "Ldalvik/system/EmulatedStackFrame;",
     "Z",
     "B",
     "C",
@@ -8200,6 +8133,7 @@
     "[J",
     "[S",
     "[Ljava/lang/StackTraceElement;",
+    "Ldalvik/system/ClassExt;",
   };
   static_assert(arraysize(class_roots_descriptors) == size_t(kClassRootsMax),
                 "Mismatch between class descriptors and class-root enum");
@@ -8219,7 +8153,7 @@
   StackHandleScope<11> hs(self);
 
   ArtField* dex_elements_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList_dexElements);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList_dexElements);
 
   Handle<mirror::Class> dex_elements_class(hs.NewHandle(dex_elements_field->GetType<true>()));
   DCHECK(dex_elements_class.Get() != nullptr);
@@ -8232,13 +8166,13 @@
       hs.NewHandle(dex_elements_class->GetComponentType());
 
   ArtField* element_file_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
   DCHECK_EQ(h_dex_element_class.Get(), element_file_field->GetDeclaringClass());
 
-  ArtField* cookie_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie);
+  ArtField* cookie_field = jni::DecodeArtField(WellKnownClasses::dalvik_system_DexFile_cookie);
   DCHECK_EQ(cookie_field->GetDeclaringClass(), element_file_field->GetType<false>());
 
-  ArtField* file_name_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_fileName);
+  ArtField* file_name_field = jni::DecodeArtField(WellKnownClasses::dalvik_system_DexFile_fileName);
   DCHECK_EQ(file_name_field->GetDeclaringClass(), element_file_field->GetType<false>());
 
   // Fill the elements array.
@@ -8288,7 +8222,7 @@
   DCHECK(h_path_class_loader.Get() != nullptr);
   // Set DexPathList.
   ArtField* path_list_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_PathClassLoader_pathList);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_BaseDexClassLoader_pathList);
   DCHECK(path_list_field != nullptr);
   path_list_field->SetObject<false>(h_path_class_loader.Get(), h_dex_path_list.Get());
 
@@ -8298,13 +8232,13 @@
       mirror::Class::FindField(self, hs.NewHandle(h_path_class_loader->GetClass()), "parent",
                                "Ljava/lang/ClassLoader;");
   DCHECK(parent_field != nullptr);
-  mirror::Object* boot_cl =
+  ObjPtr<mirror::Object> boot_cl =
       soa.Decode<mirror::Class>(WellKnownClasses::java_lang_BootClassLoader)->AllocObject(self);
   parent_field->SetObject<false>(h_path_class_loader.Get(), boot_cl);
 
   // Make it a global ref and return.
   ScopedLocalRef<jobject> local_ref(
-      soa.Env(), soa.Env()->AddLocalReference<jobject>(MakeObjPtr(h_path_class_loader.Get())));
+      soa.Env(), soa.Env()->AddLocalReference<jobject>(h_path_class_loader.Get()));
   return soa.Env()->NewGlobalRef(local_ref.get());
 }
 
@@ -8341,19 +8275,20 @@
   Thread* const self = Thread::Current();
   for (const ClassLoaderData& data : class_loaders_) {
     // Need to use DecodeJObject so that we get null for cleared JNI weak globals.
-    auto* const class_loader = down_cast<mirror::ClassLoader*>(self->DecodeJObject(data.weak_root));
+    ObjPtr<mirror::ClassLoader> class_loader = ObjPtr<mirror::ClassLoader>::DownCast(
+        self->DecodeJObject(data.weak_root));
     if (class_loader != nullptr) {
-      visitor->Visit(class_loader);
+      visitor->Visit(class_loader.Ptr());
     }
   }
 }
 
-void ClassLinker::InsertDexFileInToClassLoader(mirror::Object* dex_file,
-                                               mirror::ClassLoader* class_loader) {
+void ClassLinker::InsertDexFileInToClassLoader(ObjPtr<mirror::Object> dex_file,
+                                               ObjPtr<mirror::ClassLoader> class_loader) {
   DCHECK(dex_file != nullptr);
   Thread* const self = Thread::Current();
   WriterMutexLock mu(self, *Locks::classlinker_classes_lock_);
-  ClassTable* const table = ClassTableForClassLoader(class_loader);
+  ClassTable* const table = ClassTableForClassLoader(class_loader.Ptr());
   DCHECK(table != nullptr);
   if (table->InsertStrongRoot(dex_file) && class_loader != nullptr) {
     // It was not already inserted, perform the write barrier to let the GC know the class loader's
@@ -8371,8 +8306,8 @@
     for (auto it = class_loaders_.begin(); it != class_loaders_.end(); ) {
       const ClassLoaderData& data = *it;
       // Need to use DecodeJObject so that we get null for cleared JNI weak globals.
-      auto* const class_loader =
-          down_cast<mirror::ClassLoader*>(self->DecodeJObject(data.weak_root));
+      ObjPtr<mirror::ClassLoader> class_loader =
+          ObjPtr<mirror::ClassLoader>::DownCast(self->DecodeJObject(data.weak_root));
       if (class_loader != nullptr) {
         ++it;
       } else {
@@ -8400,8 +8335,7 @@
     if (soa.Self()->IsJWeakCleared(data.weak_root)) {
       continue;
     }
-    mirror::DexCache* dex_cache =
-        down_cast<mirror::DexCache*>(soa.Self()->DecodeJObject(data.weak_root));
+    ObjPtr<mirror::DexCache> dex_cache = soa.Decode<mirror::DexCache>(data.weak_root);
     if (dex_cache == nullptr) {
       continue;
     }
@@ -8419,7 +8353,7 @@
     std::unordered_set<uint16_t> class_set;
     CHECK_EQ(num_types, dex_cache->NumResolvedTypes());
     for (size_t i = 0; i < num_types; ++i) {
-      mirror::Class* klass = dex_cache->GetResolvedType(i);
+      ObjPtr<mirror::Class> klass = dex_cache->GetResolvedType(i);
       // Filter out null class loader since that is the boot class loader.
       if (klass == nullptr || (ignore_boot_classes && klass->GetClassLoader() == nullptr)) {
         continue;
@@ -8430,7 +8364,7 @@
         DCHECK(klass->IsErroneous());
         continue;
       }
-      mirror::DexCache* klass_dex_cache = klass->GetDexCache();
+      ObjPtr<mirror::DexCache> klass_dex_cache = klass->GetDexCache();
       if (klass_dex_cache == dex_cache) {
         const size_t class_def_idx = klass->GetDexClassDefIndex();
         DCHECK(klass->IsResolved());
@@ -8468,8 +8402,7 @@
   ReaderMutexLock mu(self, *DexLock());
   for (const ClassLinker::DexCacheData& data : GetDexCachesData()) {
     if (!self->IsJWeakCleared(data.weak_root)) {
-      mirror::DexCache* dex_cache =
-          down_cast<mirror::DexCache*>(soa.Self()->DecodeJObject(data.weak_root));
+      ObjPtr<mirror::DexCache> dex_cache = soa.Decode<mirror::DexCache>(data.weak_root);
       if (dex_cache != nullptr) {
         const DexFile* dex_file = dex_cache->GetDexFile();
         // There could be duplicates if two dex files with the same location are mapped.
@@ -8509,7 +8442,7 @@
       : method_(method),
         pointer_size_(pointer_size) {}
 
-  bool operator()(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_) OVERRIDE {
+  bool operator()(ObjPtr<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_) OVERRIDE {
     if (klass->GetVirtualMethodsSliceUnchecked(pointer_size_).Contains(method_)) {
       holder_ = klass;
     }
@@ -8517,7 +8450,7 @@
     return holder_ == nullptr;
   }
 
-  mirror::Class* holder_ = nullptr;
+  ObjPtr<mirror::Class> holder_ = nullptr;
   const ArtMethod* const method_;
   const PointerSize pointer_size_;
 };
@@ -8527,7 +8460,7 @@
   CHECK(method->IsCopied());
   FindVirtualMethodHolderVisitor visitor(method, image_pointer_size_);
   VisitClasses(&visitor);
-  return visitor.holder_;
+  return visitor.holder_.Ptr();
 }
 
 // Instantiate ResolveMethod.
diff --git a/runtime/class_linker.h b/runtime/class_linker.h
index 43ffc8e..1d29e31 100644
--- a/runtime/class_linker.h
+++ b/runtime/class_linker.h
@@ -36,6 +36,7 @@
 #include "jni.h"
 #include "mirror/class.h"
 #include "object_callbacks.h"
+#include "verifier/method_verifier.h"
 #include "verifier/verifier_log_mode.h"
 
 namespace art {
@@ -73,13 +74,13 @@
  public:
   virtual ~ClassVisitor() {}
   // Return true to continue visiting.
-  virtual bool operator()(mirror::Class* klass) = 0;
+  virtual bool operator()(ObjPtr<mirror::Class> klass) = 0;
 };
 
 class ClassLoaderVisitor {
  public:
   virtual ~ClassLoaderVisitor() {}
-  virtual void Visit(mirror::ClassLoader* class_loader)
+  virtual void Visit(ObjPtr<mirror::ClassLoader> class_loader)
       REQUIRES_SHARED(Locks::classlinker_classes_lock_, Locks::mutator_lock_) = 0;
 };
 
@@ -108,6 +109,7 @@
     kJavaLangThrowable,
     kJavaLangClassNotFoundException,
     kJavaLangStackTraceElement,
+    kDalvikSystemEmulatedStackFrame,
     kPrimitiveBoolean,
     kPrimitiveByte,
     kPrimitiveChar,
@@ -126,6 +128,7 @@
     kLongArrayClass,
     kShortArrayClass,
     kJavaLangStackTraceElementArrayClass,
+    kDalvikSystemClassExt,
     kClassRootsMax,
   };
 
@@ -171,20 +174,6 @@
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
 
-  // Finds a class in the path class loader, loading it if necessary without using JNI. Hash
-  // function is supposed to be ComputeModifiedUtf8Hash(descriptor). Returns true if the
-  // class-loader chain could be handled, false otherwise, i.e., a non-supported class-loader
-  // was encountered while walking the parent chain (currently only BootClassLoader and
-  // PathClassLoader are supported).
-  bool FindClassInPathClassLoader(ScopedObjectAccessAlreadyRunnable& soa,
-                                  Thread* self,
-                                  const char* descriptor,
-                                  size_t hash,
-                                  Handle<mirror::ClassLoader> class_loader,
-                                  mirror::Class** result)
-      REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!dex_lock_);
-
   // Finds a class by its descriptor using the "system" class loader, ie by searching the
   // boot_class_path_.
   mirror::Class* FindSystemClass(Thread* self, const char* descriptor)
@@ -192,7 +181,7 @@
       REQUIRES(!dex_lock_);
 
   // Finds the array class given for the element class.
-  mirror::Class* FindArrayClass(Thread* self, mirror::Class** element_class)
+  mirror::Class* FindArrayClass(Thread* self, ObjPtr<mirror::Class>* element_class)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
 
@@ -215,13 +204,14 @@
   // by the given 'class_loader'.
   mirror::Class* LookupClass(Thread* self,
                              const char* descriptor,
-                             size_t hash,
-                             mirror::ClassLoader* class_loader)
+                             ObjPtr<mirror::ClassLoader> class_loader)
       REQUIRES(!Locks::classlinker_classes_lock_)
-      REQUIRES_SHARED(Locks::mutator_lock_);
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    return LookupClass(self, descriptor, ComputeModifiedUtf8Hash(descriptor), class_loader);
+  }
 
   // Finds all the classes with the given descriptor, regardless of ClassLoader.
-  void LookupClasses(const char* descriptor, std::vector<mirror::Class*>& classes)
+  void LookupClasses(const char* descriptor, std::vector<ObjPtr<mirror::Class>>& classes)
       REQUIRES(!Locks::classlinker_classes_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -229,7 +219,7 @@
 
   // General class unloading is not supported, this is used to prune
   // unwanted classes during image writing.
-  bool RemoveClass(const char* descriptor, mirror::ClassLoader* class_loader)
+  bool RemoveClass(const char* descriptor, ObjPtr<mirror::ClassLoader> class_loader)
       REQUIRES(!Locks::classlinker_classes_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -264,7 +254,9 @@
   // Resolve a Type with the given index from the DexFile, storing the
   // result in the DexCache. The referrer is used to identify the
   // target DexCache and ClassLoader to use for resolution.
-  mirror::Class* ResolveType(const DexFile& dex_file, uint16_t type_idx, mirror::Class* referrer)
+  mirror::Class* ResolveType(const DexFile& dex_file,
+                             uint16_t type_idx,
+                             ObjPtr<mirror::Class> referrer)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!dex_lock_, !Roles::uninterruptible_);
 
@@ -342,9 +334,9 @@
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!dex_lock_, !Roles::uninterruptible_);
 
-  ArtField* GetResolvedField(uint32_t field_idx, mirror::Class* field_declaring_class)
+  ArtField* GetResolvedField(uint32_t field_idx, ObjPtr<mirror::Class> field_declaring_class)
       REQUIRES_SHARED(Locks::mutator_lock_);
-  ArtField* GetResolvedField(uint32_t field_idx, mirror::DexCache* dex_cache)
+  ArtField* GetResolvedField(uint32_t field_idx, ObjPtr<mirror::DexCache> dex_cache)
       REQUIRES_SHARED(Locks::mutator_lock_);
   ArtField* ResolveField(uint32_t field_idx, ArtMethod* referrer, bool is_static)
       REQUIRES_SHARED(Locks::mutator_lock_)
@@ -402,7 +394,7 @@
       REQUIRES(!dex_lock_, !Roles::uninterruptible_);
 
   mirror::DexCache* RegisterDexFile(const DexFile& dex_file,
-                                    mirror::ClassLoader* class_loader)
+                                    ObjPtr<mirror::ClassLoader> class_loader)
       REQUIRES(!dex_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
   void RegisterDexFile(const DexFile& dex_file, Handle<mirror::DexCache> dex_cache)
@@ -426,10 +418,10 @@
       REQUIRES(!dex_lock_);
 
   void VisitClassRoots(RootVisitor* visitor, VisitRootFlags flags)
-      REQUIRES(!Locks::classlinker_classes_lock_)
+      REQUIRES(!Locks::classlinker_classes_lock_, !Locks::trace_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
   void VisitRoots(RootVisitor* visitor, VisitRootFlags flags)
-      REQUIRES(!dex_lock_)
+      REQUIRES(!dex_lock_, !Locks::classlinker_classes_lock_, !Locks::trace_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   mirror::DexCache* FindDexCache(Thread* self,
@@ -481,13 +473,14 @@
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Roles::uninterruptible_);
 
-  void VerifyClass(Thread* self,
-                   Handle<mirror::Class> klass,
-                   verifier::HardFailLogMode log_level = verifier::HardFailLogMode::kLogNone)
+  verifier::MethodVerifier::FailureKind VerifyClass(
+      Thread* self,
+      Handle<mirror::Class> klass,
+      verifier::HardFailLogMode log_level = verifier::HardFailLogMode::kLogNone)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
   bool VerifyClassUsingOatFile(const DexFile& dex_file,
-                               mirror::Class* klass,
+                               ObjPtr<mirror::Class> klass,
                                mirror::Class::Status& oat_file_class_status)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
@@ -505,10 +498,10 @@
                                   jobjectArray methods,
                                   jobjectArray throws)
       REQUIRES_SHARED(Locks::mutator_lock_);
-  std::string GetDescriptorForProxy(mirror::Class* proxy_class)
+  std::string GetDescriptorForProxy(ObjPtr<mirror::Class> proxy_class)
       REQUIRES_SHARED(Locks::mutator_lock_);
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
-  ArtMethod* FindMethodForProxy(mirror::Class* proxy_class, ArtMethod* proxy_method)
+  ArtMethod* FindMethodForProxy(ObjPtr<mirror::Class> proxy_class, ArtMethod* proxy_method)
       REQUIRES(!dex_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -551,7 +544,7 @@
   // Attempts to insert a class into a class table.  Returns null if
   // the class was inserted, otherwise returns an existing class with
   // the same descriptor and ClassLoader.
-  mirror::Class* InsertClass(const char* descriptor, mirror::Class* klass, size_t hash)
+  mirror::Class* InsertClass(const char* descriptor, ObjPtr<mirror::Class> klass, size_t hash)
       REQUIRES(!Locks::classlinker_classes_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -561,17 +554,6 @@
     return class_roots;
   }
 
-  // Move all of the boot image classes into the class table for faster lookups.
-  void AddBootImageClassesToClassTable()
-      REQUIRES(!Locks::classlinker_classes_lock_)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
-  // Add image classes to the class table.
-  void AddImageClassesToClassTable(std::vector<gc::space::ImageSpace*> image_spaces,
-                                   mirror::ClassLoader* class_loader)
-      REQUIRES(!Locks::classlinker_classes_lock_)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
   // Move the class table to the pre-zygote table to reduce memory usage. This works by ensuring
   // that no more classes are ever added to the pre zygote table which makes it that the pages
   // always remain shared dirty instead of private dirty.
@@ -590,7 +572,7 @@
   }
 
   // Used by image writer for checking.
-  bool ClassInClassTable(mirror::Class* klass)
+  bool ClassInClassTable(ObjPtr<mirror::Class> klass)
       REQUIRES(Locks::classlinker_classes_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -607,17 +589,18 @@
 
   // Unlike GetOrCreateAllocatorForClassLoader, GetAllocatorForClassLoader asserts that the
   // allocator for this class loader is already created.
-  LinearAlloc* GetAllocatorForClassLoader(mirror::ClassLoader* class_loader)
+  LinearAlloc* GetAllocatorForClassLoader(ObjPtr<mirror::ClassLoader> class_loader)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Return the linear alloc for a class loader if it is already allocated, otherwise allocate and
   // set it. TODO: Consider using a lock other than classlinker_classes_lock_.
-  LinearAlloc* GetOrCreateAllocatorForClassLoader(mirror::ClassLoader* class_loader)
+  LinearAlloc* GetOrCreateAllocatorForClassLoader(ObjPtr<mirror::ClassLoader> class_loader)
       REQUIRES(!Locks::classlinker_classes_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // May be called with null class_loader due to legacy code. b/27954959
-  void InsertDexFileInToClassLoader(mirror::Object* dex_file, mirror::ClassLoader* class_loader)
+  void InsertDexFileInToClassLoader(ObjPtr<mirror::Object> dex_file,
+                                    ObjPtr<mirror::ClassLoader> class_loader)
       REQUIRES(!Locks::classlinker_classes_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -632,10 +615,10 @@
       REQUIRES(!dex_lock_);
 
   static bool IsBootClassLoader(ScopedObjectAccessAlreadyRunnable& soa,
-                                mirror::ClassLoader* class_loader)
+                                ObjPtr<mirror::ClassLoader> class_loader)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ArtMethod* AddMethodToConflictTable(mirror::Class* klass,
+  ArtMethod* AddMethodToConflictTable(ObjPtr<mirror::Class> klass,
                                       ArtMethod* conflict_method,
                                       ArtMethod* interface_method,
                                       ArtMethod* method,
@@ -652,7 +635,7 @@
 
 
   // Create the IMT and conflict tables for a class.
-  void FillIMTAndConflictTables(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_);
+  void FillIMTAndConflictTables(ObjPtr<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Clear class table strong roots (other than classes themselves). This is done by dex2oat to
   // allow pruning dex caches.
@@ -662,7 +645,7 @@
 
   // Throw the class initialization failure recorded when first trying to initialize the given
   // class.
-  void ThrowEarlierClassFailure(mirror::Class* c, bool wrap_in_no_class_def = false)
+  void ThrowEarlierClassFailure(ObjPtr<mirror::Class> c, bool wrap_in_no_class_def = false)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
 
@@ -721,7 +704,9 @@
       REQUIRES(!dex_lock_, !Roles::uninterruptible_);
 
   // For early bootstrapping by Init
-  mirror::Class* AllocClass(Thread* self, mirror::Class* java_lang_Class, uint32_t class_size)
+  mirror::Class* AllocClass(Thread* self,
+                            ObjPtr<mirror::Class> java_lang_Class,
+                            uint32_t class_size)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Roles::uninterruptible_);
 
@@ -732,7 +717,7 @@
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Roles::uninterruptible_);
 
-  mirror::DexCache* AllocDexCache(mirror::String** out_location,
+  mirror::DexCache* AllocDexCache(ObjPtr<mirror::String>* out_location,
                                   Thread* self,
                                   const DexFile& dex_file)
       REQUIRES_SHARED(Locks::mutator_lock_)
@@ -747,8 +732,8 @@
       REQUIRES(!Roles::uninterruptible_);
 
   void InitializeDexCache(Thread* self,
-                          mirror::DexCache* dex_cache,
-                          mirror::String* location,
+                          ObjPtr<mirror::DexCache> dex_cache,
+                          ObjPtr<mirror::String> location,
                           const DexFile& dex_file,
                           LinearAlloc* linear_alloc)
       REQUIRES_SHARED(Locks::mutator_lock_)
@@ -757,7 +742,8 @@
   mirror::Class* CreatePrimitiveClass(Thread* self, Primitive::Type type)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Roles::uninterruptible_);
-  mirror::Class* InitializePrimitiveClass(mirror::Class* primitive_class, Primitive::Type type)
+  mirror::Class* InitializePrimitiveClass(ObjPtr<mirror::Class> primitive_class,
+                                          Primitive::Type type)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Roles::uninterruptible_);
 
@@ -785,7 +771,7 @@
   void SetupClass(const DexFile& dex_file,
                   const DexFile::ClassDef& dex_class_def,
                   Handle<mirror::Class> klass,
-                  mirror::ClassLoader* class_loader)
+                  ObjPtr<mirror::ClassLoader> class_loader)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   void LoadClass(Thread* self,
@@ -807,7 +793,30 @@
                   Handle<mirror::Class> klass, ArtMethod* dst)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void FixupStaticTrampolines(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_);
+  void FixupStaticTrampolines(ObjPtr<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Finds a class in a Path- or DexClassLoader, loading it if necessary without using JNI. Hash
+  // function is supposed to be ComputeModifiedUtf8Hash(descriptor). Returns true if the
+  // class-loader chain could be handled, false otherwise, i.e., a non-supported class-loader
+  // was encountered while walking the parent chain (currently only BootClassLoader and
+  // PathClassLoader are supported).
+  bool FindClassInBaseDexClassLoader(ScopedObjectAccessAlreadyRunnable& soa,
+                                     Thread* self,
+                                     const char* descriptor,
+                                     size_t hash,
+                                     Handle<mirror::ClassLoader> class_loader,
+                                     ObjPtr<mirror::Class>* result)
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(!dex_lock_);
+
+  // Finds a class by its descriptor, returning NULL if it isn't wasn't loaded
+  // by the given 'class_loader'. Uses the provided hash for the descriptor.
+  mirror::Class* LookupClass(Thread* self,
+                             const char* descriptor,
+                             size_t hash,
+                             ObjPtr<mirror::ClassLoader> class_loader)
+      REQUIRES(!Locks::classlinker_classes_lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   void RegisterDexFileLocked(const DexFile& dex_file, Handle<mirror::DexCache> dex_cache)
       REQUIRES(dex_lock_)
@@ -842,8 +851,8 @@
 
   bool IsSameMethodSignatureInDifferentClassContexts(Thread* self,
                                                      ArtMethod* method,
-                                                     mirror::Class* klass1,
-                                                     mirror::Class* klass2)
+                                                     ObjPtr<mirror::Class> klass1,
+                                                     ObjPtr<mirror::Class> klass2)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   bool LinkClass(Thread* self,
@@ -1050,21 +1059,18 @@
   void EnsureSkipAccessChecksMethods(Handle<mirror::Class> c)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  mirror::Class* LookupClassFromBootImage(const char* descriptor)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
   // Register a class loader and create its class table and allocator. Should not be called if
   // these are already created.
-  void RegisterClassLoader(mirror::ClassLoader* class_loader)
+  void RegisterClassLoader(ObjPtr<mirror::ClassLoader> class_loader)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(Locks::classlinker_classes_lock_);
 
   // Returns null if not found.
-  ClassTable* ClassTableForClassLoader(mirror::ClassLoader* class_loader)
+  ClassTable* ClassTableForClassLoader(ObjPtr<mirror::ClassLoader> class_loader)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Insert a new class table if not found.
-  ClassTable* InsertClassTableForClassLoader(mirror::ClassLoader* class_loader)
+  ClassTable* InsertClassTableForClassLoader(ObjPtr<mirror::ClassLoader> class_loader)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(Locks::classlinker_classes_lock_);
 
@@ -1074,24 +1080,27 @@
   // when resolution has occurred. This happens in mirror::Class::SetStatus. As resolution may
   // retire a class, the version of the class in the table is returned and this may differ from
   // the class passed in.
-  mirror::Class* EnsureResolved(Thread* self, const char* descriptor, mirror::Class* klass)
+  mirror::Class* EnsureResolved(Thread* self, const char* descriptor, ObjPtr<mirror::Class> klass)
       WARN_UNUSED
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!dex_lock_);
 
-  void FixupTemporaryDeclaringClass(mirror::Class* temp_class, mirror::Class* new_class)
+  void FixupTemporaryDeclaringClass(ObjPtr<mirror::Class> temp_class,
+                                    ObjPtr<mirror::Class> new_class)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void SetClassRoot(ClassRoot class_root, mirror::Class* klass)
+  void SetClassRoot(ClassRoot class_root, ObjPtr<mirror::Class> klass)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Return the quick generic JNI stub for testing.
   const void* GetRuntimeQuickGenericJniStub() const;
 
-  bool CanWeInitializeClass(mirror::Class* klass, bool can_init_statics, bool can_init_parents)
+  bool CanWeInitializeClass(ObjPtr<mirror::Class> klass,
+                            bool can_init_statics,
+                            bool can_init_parents)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void UpdateClassMethods(mirror::Class* klass,
+  void UpdateClassMethods(ObjPtr<mirror::Class> klass,
                           LengthPrefixedArray<ArtMethod>* new_methods)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::classlinker_classes_lock_);
@@ -1123,10 +1132,10 @@
                  /*out*/bool* new_conflict,
                  /*out*/ArtMethod** imt_ref) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void FillIMTFromIfTable(mirror::IfTable* if_table,
+  void FillIMTFromIfTable(ObjPtr<mirror::IfTable> if_table,
                           ArtMethod* unimplemented_method,
                           ArtMethod* imt_conflict_method,
-                          mirror::Class* klass,
+                          ObjPtr<mirror::Class> klass,
                           bool create_conflict_tables,
                           bool ignore_copied_methods,
                           /*out*/bool* new_conflict,
@@ -1157,8 +1166,6 @@
   // New class roots, only used by CMS since the GC needs to mark these in the pause.
   std::vector<GcRoot<mirror::Class>> new_class_roots_ GUARDED_BY(Locks::classlinker_classes_lock_);
 
-  // Do we need to search dex caches to find boot image classes?
-  bool dex_cache_boot_image_class_lookup_required_;
   // Number of times we've searched dex caches for a class. After a certain number of misses we move
   // the classes into the class_table_ to avoid dex cache based searches.
   Atomic<uint32_t> failed_dex_cache_class_lookups_;
@@ -1194,6 +1201,7 @@
   friend struct CompilationHelper;  // For Compile in ImageTest.
   friend class ImageDumper;  // for DexLock
   friend class ImageWriter;  // for GetClassRoots
+  friend class VMClassLoader;  // for LookupClass and FindClassInBaseDexClassLoader.
   friend class JniCompilerTest;  // for GetRuntimeQuickGenericJniStub
   friend class JniInternalTest;  // for GetRuntimeQuickGenericJniStub
   ART_FRIEND_TEST(ClassLinkerTest, RegisterDexFileName);  // for DexLock, and RegisterDexFileLocked
diff --git a/runtime/class_linker_test.cc b/runtime/class_linker_test.cc
index 5466539..44590ba 100644
--- a/runtime/class_linker_test.cc
+++ b/runtime/class_linker_test.cc
@@ -30,7 +30,9 @@
 #include "gc/heap.h"
 #include "mirror/accessible_object.h"
 #include "mirror/class-inl.h"
+#include "mirror/class_ext.h"
 #include "mirror/dex_cache.h"
+#include "mirror/emulated_stack_frame.h"
 #include "mirror/executable.h"
 #include "mirror/field.h"
 #include "mirror/method_type.h"
@@ -99,7 +101,8 @@
     EXPECT_EQ(0U, primitive->NumDirectInterfaces());
     EXPECT_FALSE(primitive->HasVTable());
     EXPECT_EQ(0, primitive->GetIfTableCount());
-    EXPECT_TRUE(primitive->GetIfTable() == nullptr);
+    EXPECT_TRUE(primitive->GetIfTable() != nullptr);
+    EXPECT_EQ(primitive->GetIfTable()->Count(), 0u);
     EXPECT_EQ(kAccPublic | kAccFinal | kAccAbstract, primitive->GetAccessFlags());
   }
 
@@ -211,13 +214,13 @@
     EXPECT_TRUE(array->ShouldHaveEmbeddedVTable());
     EXPECT_EQ(2, array->GetIfTableCount());
     ASSERT_TRUE(array->GetIfTable() != nullptr);
-    mirror::Class* direct_interface0 = mirror::Class::GetDirectInterface(self, array, 0);
+    ObjPtr<mirror::Class> direct_interface0 = mirror::Class::GetDirectInterface(self, array, 0);
     EXPECT_TRUE(direct_interface0 != nullptr);
     EXPECT_STREQ(direct_interface0->GetDescriptor(&temp), "Ljava/lang/Cloneable;");
-    mirror::Class* direct_interface1 = mirror::Class::GetDirectInterface(self, array, 1);
+    ObjPtr<mirror::Class> direct_interface1 = mirror::Class::GetDirectInterface(self, array, 1);
     EXPECT_STREQ(direct_interface1->GetDescriptor(&temp), "Ljava/io/Serializable;");
-    mirror::Class* array_ptr = array->GetComponentType();
-    EXPECT_EQ(class_linker_->FindArrayClass(self, &array_ptr), array.Get());
+    ObjPtr<mirror::Class> array_ptr = array->GetComponentType();
+    EXPECT_OBJ_PTR_EQ(class_linker_->FindArrayClass(self, &array_ptr), array.Get());
 
     PointerSize pointer_size = class_linker_->GetImagePointerSize();
     mirror::Class* JavaLangObject =
@@ -332,9 +335,9 @@
       EXPECT_FALSE(method.IsDirect());
       EXPECT_TRUE(method.IsCopied());
       EXPECT_TRUE(method.GetDeclaringClass()->IsInterface())
-          << "declaring class: " << PrettyClass(method.GetDeclaringClass());
+          << "declaring class: " << method.GetDeclaringClass()->PrettyClass();
       EXPECT_TRUE(method.GetDeclaringClass()->IsAssignableFrom(klass.Get()))
-          << "declaring class: " << PrettyClass(method.GetDeclaringClass());
+          << "declaring class: " << method.GetDeclaringClass()->PrettyClass();
     }
 
     for (size_t i = 0; i < klass->NumInstanceFields(); i++) {
@@ -366,8 +369,7 @@
         if (current_ref_offset.Uint32Value() == end_ref_offset.Uint32Value()) {
           // While Reference.referent is not primitive, the ClassLinker
           // treats it as such so that the garbage collector won't scan it.
-          EXPECT_EQ(PrettyField(field),
-                    "java.lang.Object java.lang.ref.Reference.referent");
+          EXPECT_EQ(field->PrettyField(), "java.lang.Object java.lang.ref.Reference.referent");
         } else {
           current_ref_offset = MemberOffset(current_ref_offset.Uint32Value() +
                                             sizeof(mirror::HeapReference<mirror::Object>));
@@ -586,6 +588,7 @@
     addOffset(OFFSETOF_MEMBER(mirror::Class, dex_cache_strings_), "dexCacheStrings");
     addOffset(OFFSETOF_MEMBER(mirror::Class, dex_class_def_idx_), "dexClassDefIndex");
     addOffset(OFFSETOF_MEMBER(mirror::Class, dex_type_idx_), "dexTypeIndex");
+    addOffset(OFFSETOF_MEMBER(mirror::Class, ext_data_), "extData");
     addOffset(OFFSETOF_MEMBER(mirror::Class, ifields_), "iFields");
     addOffset(OFFSETOF_MEMBER(mirror::Class, iftable_), "ifTable");
     addOffset(OFFSETOF_MEMBER(mirror::Class, methods_), "methods");
@@ -603,12 +606,17 @@
     addOffset(OFFSETOF_MEMBER(mirror::Class, sfields_), "sFields");
     addOffset(OFFSETOF_MEMBER(mirror::Class, status_), "status");
     addOffset(OFFSETOF_MEMBER(mirror::Class, super_class_), "superClass");
-    addOffset(OFFSETOF_MEMBER(mirror::Class, verify_error_), "verifyError");
     addOffset(OFFSETOF_MEMBER(mirror::Class, virtual_methods_offset_), "virtualMethodsOffset");
     addOffset(OFFSETOF_MEMBER(mirror::Class, vtable_), "vtable");
   };
 };
 
+struct ClassExtOffsets : public CheckOffsets<mirror::ClassExt> {
+  ClassExtOffsets() : CheckOffsets<mirror::ClassExt>(false, "Ldalvik/system/ClassExt;") {
+    addOffset(OFFSETOF_MEMBER(mirror::ClassExt, verify_error_), "verifyError");
+  }
+};
+
 struct StringOffsets : public CheckOffsets<mirror::String> {
   StringOffsets() : CheckOffsets<mirror::String>(false, "Ljava/lang/String;") {
     addOffset(OFFSETOF_MEMBER(mirror::String, count_), "count");
@@ -735,12 +743,21 @@
   MethodHandleImplOffsets() : CheckOffsets<mirror::MethodHandleImpl>(
       false, "Ljava/lang/invoke/MethodHandle;") {
     addOffset(OFFSETOF_MEMBER(mirror::MethodHandleImpl, art_field_or_method_), "artFieldOrMethod");
-    addOffset(OFFSETOF_MEMBER(mirror::MethodHandleImpl, as_type_cache_), "asTypeCache");
     addOffset(OFFSETOF_MEMBER(mirror::MethodHandleImpl, handle_kind_), "handleKind");
+    addOffset(OFFSETOF_MEMBER(mirror::MethodHandleImpl, nominal_type_), "nominalType");
     addOffset(OFFSETOF_MEMBER(mirror::MethodHandleImpl, method_type_), "type");
   }
 };
 
+struct EmulatedStackFrameOffsets : public CheckOffsets<mirror::EmulatedStackFrame> {
+  EmulatedStackFrameOffsets() : CheckOffsets<mirror::EmulatedStackFrame>(
+      false, "Ldalvik/system/EmulatedStackFrame;") {
+    addOffset(OFFSETOF_MEMBER(mirror::EmulatedStackFrame, references_), "references");
+    addOffset(OFFSETOF_MEMBER(mirror::EmulatedStackFrame, stack_frame_), "stackFrame");
+    addOffset(OFFSETOF_MEMBER(mirror::EmulatedStackFrame, type_), "type");
+  }
+};
+
 // C++ fields must exactly match the fields in the Java classes. If this fails,
 // reorder the fields in the C++ class. Managed class fields are ordered by
 // ClassLinker::LinkFields.
@@ -748,6 +765,7 @@
   ScopedObjectAccess soa(Thread::Current());
   EXPECT_TRUE(ObjectOffsets().Check());
   EXPECT_TRUE(ClassOffsets().Check());
+  EXPECT_TRUE(ClassExtOffsets().Check());
   EXPECT_TRUE(StringOffsets().Check());
   EXPECT_TRUE(ThrowableOffsets().Check());
   EXPECT_TRUE(StackTraceElementOffsets().Check());
@@ -761,6 +779,7 @@
   EXPECT_TRUE(ExecutableOffsets().Check());
   EXPECT_TRUE(MethodTypeOffsets().Check());
   EXPECT_TRUE(MethodHandleImplOffsets().Check());
+  EXPECT_TRUE(EmulatedStackFrameOffsets().Check());
 }
 
 TEST_F(ClassLinkerTest, FindClassNonexistent) {
@@ -875,7 +894,7 @@
   uint32_t type_idx = klass->GetClassDef()->class_idx_;
   ObjPtr<mirror::DexCache> dex_cache = klass->GetDexCache();
   const DexFile& dex_file = klass->GetDexFile();
-  EXPECT_EQ(dex_cache->GetResolvedType(type_idx), klass.Ptr());
+  EXPECT_OBJ_PTR_EQ(dex_cache->GetResolvedType(type_idx), klass);
   EXPECT_OBJ_PTR_EQ(
       class_linker_->LookupResolvedType(dex_file, type_idx, dex_cache, class_loader.Get()),
       klass);
@@ -1215,14 +1234,14 @@
     REQUIRES_SHARED(Locks::mutator_lock_) {
   if (!method->IsNative() && !method->IsAbstract()) {
     EXPECT_EQ((method->GetAccessFlags() & kAccSkipAccessChecks) != 0U, verified)
-        << PrettyMethod(method, true);
+        << method->PrettyMethod(true);
   }
 }
 
 static void CheckVerificationAttempted(mirror::Class* c, bool preverified)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   EXPECT_EQ((c->GetAccessFlags() & kAccVerificationAttempted) != 0U, preverified)
-      << "Class " << PrettyClass(c) << " not as expected";
+      << "Class " << mirror::Class::PrettyClass(c) << " not as expected";
   for (auto& m : c->GetMethods(kRuntimePointerSize)) {
     CheckMethod(&m, preverified);
   }
@@ -1298,7 +1317,7 @@
   {
     ReaderMutexLock mu(soa.Self(), *class_linker->DexLock());
     for (const ClassLinker::DexCacheData& data : class_linker->GetDexCachesData()) {
-      dex_cache.Assign(down_cast<mirror::DexCache*>(soa.Self()->DecodeJObject(data.weak_root)));
+      dex_cache.Assign(soa.Self()->DecodeJObject(data.weak_root)->AsDexCache());
       if (dex_cache.Get() != nullptr) {
         break;
       }
diff --git a/runtime/class_table.cc b/runtime/class_table.cc
index 2ae7e8c..b44104e 100644
--- a/runtime/class_table.cc
+++ b/runtime/class_table.cc
@@ -31,7 +31,7 @@
   classes_.push_back(ClassSet());
 }
 
-bool ClassTable::Contains(mirror::Class* klass) {
+bool ClassTable::Contains(ObjPtr<mirror::Class> klass) {
   ReaderMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
     auto it = class_set.Find(GcRoot<mirror::Class>(klass));
@@ -42,7 +42,7 @@
   return false;
 }
 
-mirror::Class* ClassTable::LookupByDescriptor(mirror::Class* klass) {
+mirror::Class* ClassTable::LookupByDescriptor(ObjPtr<mirror::Class> klass) {
   ReaderMutexLock mu(Thread::Current(), lock_);
   for (ClassSet& class_set : classes_) {
     auto it = class_set.Find(GcRoot<mirror::Class>(klass));
@@ -108,16 +108,16 @@
   return nullptr;
 }
 
-void ClassTable::Insert(mirror::Class* klass) {
+void ClassTable::Insert(ObjPtr<mirror::Class> klass) {
   WriterMutexLock mu(Thread::Current(), lock_);
   classes_.back().Insert(GcRoot<mirror::Class>(klass));
 }
 
-void ClassTable::InsertWithoutLocks(mirror::Class* klass) {
+void ClassTable::InsertWithoutLocks(ObjPtr<mirror::Class> klass) {
   classes_.back().Insert(GcRoot<mirror::Class>(klass));
 }
 
-void ClassTable::InsertWithHash(mirror::Class* klass, size_t hash) {
+void ClassTable::InsertWithHash(ObjPtr<mirror::Class> klass, size_t hash) {
   WriterMutexLock mu(Thread::Current(), lock_);
   classes_.back().InsertWithHash(GcRoot<mirror::Class>(klass), hash);
 }
@@ -156,7 +156,7 @@
   return ComputeModifiedUtf8Hash(descriptor);
 }
 
-bool ClassTable::InsertStrongRoot(mirror::Object* obj) {
+bool ClassTable::InsertStrongRoot(ObjPtr<mirror::Object> obj) {
   WriterMutexLock mu(Thread::Current(), lock_);
   DCHECK(obj != nullptr);
   for (GcRoot<mirror::Object>& root : strong_roots_) {
@@ -167,17 +167,30 @@
   strong_roots_.push_back(GcRoot<mirror::Object>(obj));
   // If `obj` is a dex cache associated with a new oat file with GC roots, add it to oat_files_.
   if (obj->IsDexCache()) {
-    const DexFile* dex_file = down_cast<mirror::DexCache*>(obj)->GetDexFile();
+    const DexFile* dex_file = ObjPtr<mirror::DexCache>::DownCast(obj)->GetDexFile();
     if (dex_file != nullptr && dex_file->GetOatDexFile() != nullptr) {
       const OatFile* oat_file = dex_file->GetOatDexFile()->GetOatFile();
-      if (!oat_file->GetBssGcRoots().empty() && !ContainsElement(oat_files_, oat_file)) {
-        oat_files_.push_back(oat_file);
+      if (!oat_file->GetBssGcRoots().empty()) {
+        InsertOatFileLocked(oat_file);  // Ignore return value.
       }
     }
   }
   return true;
 }
 
+bool ClassTable::InsertOatFile(const OatFile* oat_file) {
+  WriterMutexLock mu(Thread::Current(), lock_);
+  return InsertOatFileLocked(oat_file);
+}
+
+bool ClassTable::InsertOatFileLocked(const OatFile* oat_file) {
+  if (ContainsElement(oat_files_, oat_file)) {
+    return false;
+  }
+  oat_files_.push_back(oat_file);
+  return true;
+}
+
 size_t ClassTable::WriteToMemory(uint8_t* ptr) const {
   ReaderMutexLock mu(Thread::Current(), lock_);
   ClassSet combined;
diff --git a/runtime/class_table.h b/runtime/class_table.h
index acb15c7..558c144 100644
--- a/runtime/class_table.h
+++ b/runtime/class_table.h
@@ -27,6 +27,7 @@
 #include "base/mutex.h"
 #include "dex_file.h"
 #include "gc_root.h"
+#include "obj_ptr.h"
 #include "object_callbacks.h"
 #include "runtime.h"
 
@@ -47,7 +48,7 @@
     uint32_t operator()(const GcRoot<mirror::Class>& root) const NO_THREAD_SAFETY_ANALYSIS;
     // Same class loader and descriptor.
     bool operator()(const GcRoot<mirror::Class>& a, const GcRoot<mirror::Class>& b) const
-        NO_THREAD_SAFETY_ANALYSIS;;
+        NO_THREAD_SAFETY_ANALYSIS;
     // Same descriptor.
     bool operator()(const GcRoot<mirror::Class>& a, const char* descriptor) const
         NO_THREAD_SAFETY_ANALYSIS;
@@ -72,7 +73,7 @@
   ClassTable();
 
   // Used by image writer for checking.
-  bool Contains(mirror::Class* klass)
+  bool Contains(ObjPtr<mirror::Class> klass)
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -118,15 +119,15 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Return the first class that matches the descriptor of klass. Returns null if there are none.
-  mirror::Class* LookupByDescriptor(mirror::Class* klass)
+  mirror::Class* LookupByDescriptor(ObjPtr<mirror::Class> klass)
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void Insert(mirror::Class* klass)
+  void Insert(ObjPtr<mirror::Class> klass)
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void InsertWithHash(mirror::Class* klass, size_t hash)
+  void InsertWithHash(ObjPtr<mirror::Class> klass, size_t hash)
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -136,7 +137,12 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Return true if we inserted the strong root, false if it already exists.
-  bool InsertStrongRoot(mirror::Object* obj)
+  bool InsertStrongRoot(ObjPtr<mirror::Object> obj)
+      REQUIRES(!lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Return true if we inserted the oat file, false if it already exists.
+  bool InsertOatFile(const OatFile* oat_file)
       REQUIRES(!lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -165,7 +171,12 @@
   }
 
  private:
-  void InsertWithoutLocks(mirror::Class* klass) NO_THREAD_SAFETY_ANALYSIS;
+  void InsertWithoutLocks(ObjPtr<mirror::Class> klass) NO_THREAD_SAFETY_ANALYSIS;
+
+  // Return true if we inserted the oat file, false if it already exists.
+  bool InsertOatFileLocked(const OatFile* oat_file)
+      REQUIRES(lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Lock to guard inserting and removing.
   mutable ReaderWriterMutex lock_;
diff --git a/runtime/common_runtime_test.cc b/runtime/common_runtime_test.cc
index 84752f0..8226e60 100644
--- a/runtime/common_runtime_test.cc
+++ b/runtime/common_runtime_test.cc
@@ -57,7 +57,7 @@
   // everything else. In case you want to see all messages, comment out the line.
   setenv("ANDROID_LOG_TAGS", "*:e", 1);
 
-  art::InitLogging(argv);
+  art::InitLogging(argv, art::Runtime::Aborter);
   LOG(INFO) << "Running main() from common_runtime_test.cc...";
   testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
@@ -514,23 +514,23 @@
       soa.Decode<mirror::ClassLoader>(jclass_loader));
 
   DCHECK_EQ(class_loader->GetClass(),
-            soa.Decode<mirror::Class>(WellKnownClasses::dalvik_system_PathClassLoader).Ptr());
+            soa.Decode<mirror::Class>(WellKnownClasses::dalvik_system_PathClassLoader));
   DCHECK_EQ(class_loader->GetParent()->GetClass(),
-            soa.Decode<mirror::Class>(WellKnownClasses::java_lang_BootClassLoader).Ptr());
+            soa.Decode<mirror::Class>(WellKnownClasses::java_lang_BootClassLoader));
 
   // The class loader is a PathClassLoader which inherits from BaseDexClassLoader.
   // We need to get the DexPathList and loop through it.
-  ArtField* cookie_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie);
+  ArtField* cookie_field = jni::DecodeArtField(WellKnownClasses::dalvik_system_DexFile_cookie);
   ArtField* dex_file_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
   ObjPtr<mirror::Object> dex_path_list =
-      soa.DecodeField(WellKnownClasses::dalvik_system_PathClassLoader_pathList)->
-      GetObject(class_loader.Get());
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_BaseDexClassLoader_pathList)->
+          GetObject(class_loader.Get());
   if (dex_path_list != nullptr && dex_file_field!= nullptr && cookie_field != nullptr) {
     // DexPathList has an array dexElements of Elements[] which each contain a dex file.
     ObjPtr<mirror::Object> dex_elements_obj =
-        soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList_dexElements)->
-        GetObject(dex_path_list);
+        jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList_dexElements)->
+            GetObject(dex_path_list);
     // Loop through each dalvik.system.DexPathList$Element's dalvik.system.DexFile and look
     // at the mCookie which is a DexFile vector.
     if (dex_elements_obj != nullptr) {
@@ -572,6 +572,29 @@
   return ret;
 }
 
+jobject CommonRuntimeTestImpl::LoadMultiDex(const char* first_dex_name,
+                                            const char* second_dex_name) {
+  std::vector<std::unique_ptr<const DexFile>> first_dex_files = OpenTestDexFiles(first_dex_name);
+  std::vector<std::unique_ptr<const DexFile>> second_dex_files = OpenTestDexFiles(second_dex_name);
+  std::vector<const DexFile*> class_path;
+  CHECK_NE(0U, first_dex_files.size());
+  CHECK_NE(0U, second_dex_files.size());
+  for (auto& dex_file : first_dex_files) {
+    class_path.push_back(dex_file.get());
+    loaded_dex_files_.push_back(std::move(dex_file));
+  }
+  for (auto& dex_file : second_dex_files) {
+    class_path.push_back(dex_file.get());
+    loaded_dex_files_.push_back(std::move(dex_file));
+  }
+
+  Thread* self = Thread::Current();
+  jobject class_loader = Runtime::Current()->GetClassLinker()->CreatePathClassLoader(self,
+                                                                                     class_path);
+  self->SetClassLoaderOverride(class_loader);
+  return class_loader;
+}
+
 jobject CommonRuntimeTestImpl::LoadDex(const char* dex_name) {
   std::vector<std::unique_ptr<const DexFile>> dex_files = OpenTestDexFiles(dex_name);
   std::vector<const DexFile*> class_path;
diff --git a/runtime/common_runtime_test.h b/runtime/common_runtime_test.h
index 92934c6..17e3729 100644
--- a/runtime/common_runtime_test.h
+++ b/runtime/common_runtime_test.h
@@ -52,7 +52,7 @@
 
   ScratchFile(const ScratchFile& other, const char* suffix);
 
-  explicit ScratchFile(ScratchFile&& other);
+  ScratchFile(ScratchFile&& other);
 
   ScratchFile& operator=(ScratchFile&& other);
 
@@ -133,6 +133,8 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   jobject LoadDex(const char* dex_name) REQUIRES_SHARED(Locks::mutator_lock_);
+  jobject LoadMultiDex(const char* first_dex_name, const char* second_dex_name)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   std::string android_data_;
   std::string dalvik_cache_;
diff --git a/runtime/common_throws.cc b/runtime/common_throws.cc
index 1e4c772..9f0dbbb 100644
--- a/runtime/common_throws.cc
+++ b/runtime/common_throws.cc
@@ -28,26 +28,30 @@
 #include "dex_instruction-inl.h"
 #include "invoke_type.h"
 #include "mirror/class-inl.h"
+#include "mirror/method_type.h"
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
+#include "obj_ptr-inl.h"
 #include "thread.h"
 #include "verifier/method_verifier.h"
 
 namespace art {
 
-static void AddReferrerLocation(std::ostream& os, mirror::Class* referrer)
+static void AddReferrerLocation(std::ostream& os, ObjPtr<mirror::Class> referrer)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   if (referrer != nullptr) {
     std::string location(referrer->GetLocation());
     if (!location.empty()) {
-      os << " (declaration of '" << PrettyDescriptor(referrer)
-            << "' appears in " << location << ")";
+      os << " (declaration of '" << referrer->PrettyDescriptor()
+         << "' appears in " << location << ")";
     }
   }
 }
 
 static void ThrowException(const char* exception_descriptor,
-                           mirror::Class* referrer, const char* fmt, va_list* args = nullptr)
+                           ObjPtr<mirror::Class> referrer,
+                           const char* fmt,
+                           va_list* args = nullptr)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   std::ostringstream msg;
   if (args != nullptr) {
@@ -63,7 +67,9 @@
 }
 
 static void ThrowWrappedException(const char* exception_descriptor,
-                                  mirror::Class* referrer, const char* fmt, va_list* args = nullptr)
+                                  ObjPtr<mirror::Class> referrer,
+                                  const char* fmt,
+                                  va_list* args = nullptr)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   std::ostringstream msg;
   if (args != nullptr) {
@@ -83,15 +89,14 @@
 void ThrowAbstractMethodError(ArtMethod* method) {
   ThrowException("Ljava/lang/AbstractMethodError;", nullptr,
                  StringPrintf("abstract method \"%s\"",
-                              PrettyMethod(method).c_str()).c_str());
+                              ArtMethod::PrettyMethod(method).c_str()).c_str());
 }
 
 void ThrowAbstractMethodError(uint32_t method_idx, const DexFile& dex_file) {
   ThrowException("Ljava/lang/AbstractMethodError;", /* referrer */ nullptr,
                  StringPrintf("abstract method \"%s\"",
-                              PrettyMethod(method_idx,
-                                           dex_file,
-                                           /* with_signature */ true).c_str()).c_str());
+                              dex_file.PrettyMethod(method_idx,
+                                                    /* with_signature */ true).c_str()).c_str());
 }
 
 // ArithmeticException
@@ -109,20 +114,21 @@
 
 // ArrayStoreException
 
-void ThrowArrayStoreException(mirror::Class* element_class, mirror::Class* array_class) {
+void ThrowArrayStoreException(ObjPtr<mirror::Class> element_class,
+                              ObjPtr<mirror::Class> array_class) {
   ThrowException("Ljava/lang/ArrayStoreException;", nullptr,
                  StringPrintf("%s cannot be stored in an array of type %s",
-                              PrettyDescriptor(element_class).c_str(),
-                              PrettyDescriptor(array_class).c_str()).c_str());
+                              mirror::Class::PrettyDescriptor(element_class).c_str(),
+                              mirror::Class::PrettyDescriptor(array_class).c_str()).c_str());
 }
 
 // ClassCastException
 
-void ThrowClassCastException(mirror::Class* dest_type, mirror::Class* src_type) {
+void ThrowClassCastException(ObjPtr<mirror::Class> dest_type, ObjPtr<mirror::Class> src_type) {
   ThrowException("Ljava/lang/ClassCastException;", nullptr,
                  StringPrintf("%s cannot be cast to %s",
-                              PrettyDescriptor(src_type).c_str(),
-                              PrettyDescriptor(dest_type).c_str()).c_str());
+                              mirror::Class::PrettyDescriptor(src_type).c_str(),
+                              mirror::Class::PrettyDescriptor(dest_type).c_str()).c_str());
 }
 
 void ThrowClassCastException(const char* msg) {
@@ -131,13 +137,13 @@
 
 // ClassCircularityError
 
-void ThrowClassCircularityError(mirror::Class* c) {
+void ThrowClassCircularityError(ObjPtr<mirror::Class> c) {
   std::ostringstream msg;
-  msg << PrettyDescriptor(c);
+  msg << mirror::Class::PrettyDescriptor(c);
   ThrowException("Ljava/lang/ClassCircularityError;", c, msg.str().c_str());
 }
 
-void ThrowClassCircularityError(mirror::Class* c, const char* fmt, ...) {
+void ThrowClassCircularityError(ObjPtr<mirror::Class> c, const char* fmt, ...) {
   va_list args;
   va_start(args, fmt);
   ThrowException("Ljava/lang/ClassCircularityError;", c, fmt, &args);
@@ -146,7 +152,7 @@
 
 // ClassFormatError
 
-void ThrowClassFormatError(mirror::Class* referrer, const char* fmt, ...) {
+void ThrowClassFormatError(ObjPtr<mirror::Class> referrer, const char* fmt, ...) {
   va_list args;
   va_start(args, fmt);
   ThrowException("Ljava/lang/ClassFormatError;", referrer, fmt, &args);
@@ -155,47 +161,49 @@
 
 // IllegalAccessError
 
-void ThrowIllegalAccessErrorClass(mirror::Class* referrer, mirror::Class* accessed) {
+void ThrowIllegalAccessErrorClass(ObjPtr<mirror::Class> referrer, ObjPtr<mirror::Class> accessed) {
   std::ostringstream msg;
-  msg << "Illegal class access: '" << PrettyDescriptor(referrer) << "' attempting to access '"
-      << PrettyDescriptor(accessed) << "'";
+  msg << "Illegal class access: '" << mirror::Class::PrettyDescriptor(referrer)
+      << "' attempting to access '" << mirror::Class::PrettyDescriptor(accessed) << "'";
   ThrowException("Ljava/lang/IllegalAccessError;", referrer, msg.str().c_str());
 }
 
-void ThrowIllegalAccessErrorClassForMethodDispatch(mirror::Class* referrer, mirror::Class* accessed,
+void ThrowIllegalAccessErrorClassForMethodDispatch(ObjPtr<mirror::Class> referrer,
+                                                   ObjPtr<mirror::Class> accessed,
                                                    ArtMethod* called,
                                                    InvokeType type) {
   std::ostringstream msg;
-  msg << "Illegal class access ('" << PrettyDescriptor(referrer) << "' attempting to access '"
-      << PrettyDescriptor(accessed) << "') in attempt to invoke " << type
-      << " method " << PrettyMethod(called).c_str();
+  msg << "Illegal class access ('" << mirror::Class::PrettyDescriptor(referrer)
+      << "' attempting to access '"
+      << mirror::Class::PrettyDescriptor(accessed) << "') in attempt to invoke " << type
+      << " method " << ArtMethod::PrettyMethod(called).c_str();
   ThrowException("Ljava/lang/IllegalAccessError;", referrer, msg.str().c_str());
 }
 
-void ThrowIllegalAccessErrorMethod(mirror::Class* referrer, ArtMethod* accessed) {
+void ThrowIllegalAccessErrorMethod(ObjPtr<mirror::Class> referrer, ArtMethod* accessed) {
   std::ostringstream msg;
-  msg << "Method '" << PrettyMethod(accessed) << "' is inaccessible to class '"
-      << PrettyDescriptor(referrer) << "'";
+  msg << "Method '" << ArtMethod::PrettyMethod(accessed) << "' is inaccessible to class '"
+      << mirror::Class::PrettyDescriptor(referrer) << "'";
   ThrowException("Ljava/lang/IllegalAccessError;", referrer, msg.str().c_str());
 }
 
-void ThrowIllegalAccessErrorField(mirror::Class* referrer, ArtField* accessed) {
+void ThrowIllegalAccessErrorField(ObjPtr<mirror::Class> referrer, ArtField* accessed) {
   std::ostringstream msg;
-  msg << "Field '" << PrettyField(accessed, false) << "' is inaccessible to class '"
-      << PrettyDescriptor(referrer) << "'";
+  msg << "Field '" << ArtField::PrettyField(accessed, false) << "' is inaccessible to class '"
+      << mirror::Class::PrettyDescriptor(referrer) << "'";
   ThrowException("Ljava/lang/IllegalAccessError;", referrer, msg.str().c_str());
 }
 
 void ThrowIllegalAccessErrorFinalField(ArtMethod* referrer, ArtField* accessed) {
   std::ostringstream msg;
-  msg << "Final field '" << PrettyField(accessed, false) << "' cannot be written to by method '"
-      << PrettyMethod(referrer) << "'";
+  msg << "Final field '" << ArtField::PrettyField(accessed, false)
+      << "' cannot be written to by method '" << ArtMethod::PrettyMethod(referrer) << "'";
   ThrowException("Ljava/lang/IllegalAccessError;",
                  referrer != nullptr ? referrer->GetDeclaringClass() : nullptr,
                  msg.str().c_str());
 }
 
-void ThrowIllegalAccessError(mirror::Class* referrer, const char* fmt, ...) {
+void ThrowIllegalAccessError(ObjPtr<mirror::Class> referrer, const char* fmt, ...) {
   va_list args;
   va_start(args, fmt);
   ThrowException("Ljava/lang/IllegalAccessError;", referrer, fmt, &args);
@@ -220,7 +228,7 @@
 void ThrowIncompatibleClassChangeError(InvokeType expected_type, InvokeType found_type,
                                        ArtMethod* method, ArtMethod* referrer) {
   std::ostringstream msg;
-  msg << "The method '" << PrettyMethod(method) << "' was expected to be of type "
+  msg << "The method '" << ArtMethod::PrettyMethod(method) << "' was expected to be of type "
       << expected_type << " but instead was found to be of type " << found_type;
   ThrowException("Ljava/lang/IncompatibleClassChangeError;",
                  referrer != nullptr ? referrer->GetDeclaringClass() : nullptr,
@@ -228,32 +236,33 @@
 }
 
 void ThrowIncompatibleClassChangeErrorClassForInterfaceSuper(ArtMethod* method,
-                                                             mirror::Class* target_class,
-                                                             mirror::Object* this_object,
+                                                             ObjPtr<mirror::Class> target_class,
+                                                             ObjPtr<mirror::Object> this_object,
                                                              ArtMethod* referrer) {
   // Referrer is calling interface_method on this_object, however, the interface_method isn't
   // implemented by this_object.
   CHECK(this_object != nullptr);
   std::ostringstream msg;
-  msg << "Class '" << PrettyDescriptor(this_object->GetClass())
-      << "' does not implement interface '" << PrettyDescriptor(target_class) << "' in call to '"
-      << PrettyMethod(method) << "'";
+  msg << "Class '" << mirror::Class::PrettyDescriptor(this_object->GetClass())
+      << "' does not implement interface '" << mirror::Class::PrettyDescriptor(target_class)
+      << "' in call to '"
+      << ArtMethod::PrettyMethod(method) << "'";
   ThrowException("Ljava/lang/IncompatibleClassChangeError;",
                  referrer != nullptr ? referrer->GetDeclaringClass() : nullptr,
                  msg.str().c_str());
 }
 
 void ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(ArtMethod* interface_method,
-                                                                mirror::Object* this_object,
+                                                                ObjPtr<mirror::Object> this_object,
                                                                 ArtMethod* referrer) {
   // Referrer is calling interface_method on this_object, however, the interface_method isn't
   // implemented by this_object.
   CHECK(this_object != nullptr);
   std::ostringstream msg;
-  msg << "Class '" << PrettyDescriptor(this_object->GetClass())
+  msg << "Class '" << mirror::Class::PrettyDescriptor(this_object->GetClass())
       << "' does not implement interface '"
-      << PrettyDescriptor(interface_method->GetDeclaringClass())
-      << "' in call to '" << PrettyMethod(interface_method) << "'";
+      << mirror::Class::PrettyDescriptor(interface_method->GetDeclaringClass())
+      << "' in call to '" << ArtMethod::PrettyMethod(interface_method) << "'";
   ThrowException("Ljava/lang/IncompatibleClassChangeError;",
                  referrer != nullptr ? referrer->GetDeclaringClass() : nullptr,
                  msg.str().c_str());
@@ -262,14 +271,14 @@
 void ThrowIncompatibleClassChangeErrorField(ArtField* resolved_field, bool is_static,
                                             ArtMethod* referrer) {
   std::ostringstream msg;
-  msg << "Expected '" << PrettyField(resolved_field) << "' to be a "
+  msg << "Expected '" << ArtField::PrettyField(resolved_field) << "' to be a "
       << (is_static ? "static" : "instance") << " field" << " rather than a "
       << (is_static ? "instance" : "static") << " field";
   ThrowException("Ljava/lang/IncompatibleClassChangeError;", referrer->GetDeclaringClass(),
                  msg.str().c_str());
 }
 
-void ThrowIncompatibleClassChangeError(mirror::Class* referrer, const char* fmt, ...) {
+void ThrowIncompatibleClassChangeError(ObjPtr<mirror::Class> referrer, const char* fmt, ...) {
   va_list args;
   va_start(args, fmt);
   ThrowException("Ljava/lang/IncompatibleClassChangeError;", referrer, fmt, &args);
@@ -281,7 +290,7 @@
   ThrowException("Ljava/lang/IncompatibleClassChangeError;",
                  /*referrer*/nullptr,
                  StringPrintf("Conflicting default method implementations %s",
-                              PrettyMethod(method).c_str()).c_str());
+                              ArtMethod::PrettyMethod(method).c_str()).c_str());
 }
 
 
@@ -303,14 +312,14 @@
 
 // LinkageError
 
-void ThrowLinkageError(mirror::Class* referrer, const char* fmt, ...) {
+void ThrowLinkageError(ObjPtr<mirror::Class> referrer, const char* fmt, ...) {
   va_list args;
   va_start(args, fmt);
   ThrowException("Ljava/lang/LinkageError;", referrer, fmt, &args);
   va_end(args);
 }
 
-void ThrowWrappedLinkageError(mirror::Class* referrer, const char* fmt, ...) {
+void ThrowWrappedLinkageError(ObjPtr<mirror::Class> referrer, const char* fmt, ...) {
   va_list args;
   va_start(args, fmt);
   ThrowWrappedException("Ljava/lang/LinkageError;", referrer, fmt, &args);
@@ -330,7 +339,7 @@
 
 // NoSuchFieldError
 
-void ThrowNoSuchFieldError(const StringPiece& scope, mirror::Class* c,
+void ThrowNoSuchFieldError(const StringPiece& scope, ObjPtr<mirror::Class> c,
                            const StringPiece& type, const StringPiece& name) {
   std::ostringstream msg;
   std::string temp;
@@ -339,7 +348,7 @@
   ThrowException("Ljava/lang/NoSuchFieldError;", c, msg.str().c_str());
 }
 
-void ThrowNoSuchFieldException(mirror::Class* c, const StringPiece& name) {
+void ThrowNoSuchFieldException(ObjPtr<mirror::Class> c, const StringPiece& name) {
   std::ostringstream msg;
   std::string temp;
   msg << "No field " << name << " in class " << c->GetDescriptor(&temp);
@@ -348,7 +357,7 @@
 
 // NoSuchMethodError
 
-void ThrowNoSuchMethodError(InvokeType type, mirror::Class* c, const StringPiece& name,
+void ThrowNoSuchMethodError(InvokeType type, ObjPtr<mirror::Class> c, const StringPiece& name,
                             const Signature& signature) {
   std::ostringstream msg;
   std::string temp;
@@ -362,7 +371,7 @@
 void ThrowNullPointerExceptionForFieldAccess(ArtField* field, bool is_read) {
   std::ostringstream msg;
   msg << "Attempt to " << (is_read ? "read from" : "write to")
-      << " field '" << PrettyField(field, true) << "' on a null object reference";
+      << " field '" << ArtField::PrettyField(field, true) << "' on a null object reference";
   ThrowException("Ljava/lang/NullPointerException;", nullptr, msg.str().c_str());
 }
 
@@ -372,13 +381,13 @@
     REQUIRES_SHARED(Locks::mutator_lock_) {
   std::ostringstream msg;
   msg << "Attempt to invoke " << type << " method '"
-      << PrettyMethod(method_idx, dex_file, true) << "' on a null object reference";
+      << dex_file.PrettyMethod(method_idx, true) << "' on a null object reference";
   ThrowException("Ljava/lang/NullPointerException;", nullptr, msg.str().c_str());
 }
 
 void ThrowNullPointerExceptionForMethodAccess(uint32_t method_idx,
                                               InvokeType type) {
-  mirror::DexCache* dex_cache =
+  ObjPtr<mirror::DexCache> dex_cache =
       Thread::Current()->GetCurrentMethod(nullptr)->GetDeclaringClass()->GetDexCache();
   const DexFile& dex_file = *dex_cache->GetDexFile();
   ThrowNullPointerExceptionForMethodAccessImpl(method_idx, dex_file, type);
@@ -386,7 +395,7 @@
 
 void ThrowNullPointerExceptionForMethodAccess(ArtMethod* method,
                                               InvokeType type) {
-  mirror::DexCache* dex_cache = method->GetDeclaringClass()->GetDexCache();
+  ObjPtr<mirror::DexCache> dex_cache = method->GetDeclaringClass()->GetDexCache();
   const DexFile& dex_file = *dex_cache->GetDexFile();
   ThrowNullPointerExceptionForMethodAccessImpl(method->GetDexMethodIndex(),
                                                dex_file, type);
@@ -516,7 +525,7 @@
                << ", at "
                << instr->DumpString(dex_file)
                << " in "
-               << PrettyMethod(method);
+               << method->PrettyMethod();
   }
 
   switch (instr->Opcode()) {
@@ -658,7 +667,7 @@
       LOG(FATAL) << "NullPointerException at an unexpected instruction: "
                  << instr->DumpString(dex_file)
                  << " in "
-                 << PrettyMethod(method);
+                 << method->PrettyMethod();
       break;
     }
   }
@@ -677,6 +686,15 @@
   va_end(args);
 }
 
+// SecurityException
+
+void ThrowSecurityException(const char* fmt, ...) {
+  va_list args;
+  va_start(args, fmt);
+  ThrowException("Ljava/lang/SecurityException;", nullptr, fmt, &args);
+  va_end(args);
+}
+
 // Stack overflow.
 
 void ThrowStackOverflowError(Thread* self) {
@@ -752,7 +770,7 @@
         error_msg = "Could not create stack trace.";
       }
       // Throw the exception.
-      self->SetException(reinterpret_cast<mirror::Throwable*>(self->DecodeJObject(exc.get())));
+      self->SetException(self->DecodeJObject(exc.get())->AsThrowable());
     } else {
       // Could not allocate a string object.
       error_msg = "Couldn't throw new StackOverflowError because JNI NewStringUTF failed.";
@@ -784,11 +802,22 @@
 
 // VerifyError
 
-void ThrowVerifyError(mirror::Class* referrer, const char* fmt, ...) {
+void ThrowVerifyError(ObjPtr<mirror::Class> referrer, const char* fmt, ...) {
   va_list args;
   va_start(args, fmt);
   ThrowException("Ljava/lang/VerifyError;", referrer, fmt, &args);
   va_end(args);
 }
 
+// WrongMethodTypeException
+
+void ThrowWrongMethodTypeException(mirror::MethodType* callee_type,
+                                   mirror::MethodType* callsite_type) {
+  ThrowException("Ljava/lang/invoke/WrongMethodTypeException;",
+                 nullptr,
+                 StringPrintf("Expected %s but was %s",
+                              callee_type->PrettyDescriptor().c_str(),
+                              callsite_type->PrettyDescriptor().c_str()).c_str());
+}
+
 }  // namespace art
diff --git a/runtime/common_throws.h b/runtime/common_throws.h
index 945dc2d..76ea2ae 100644
--- a/runtime/common_throws.h
+++ b/runtime/common_throws.h
@@ -19,11 +19,13 @@
 
 #include "base/mutex.h"
 #include "invoke_type.h"
+#include "obj_ptr.h"
 
 namespace art {
 namespace mirror {
   class Class;
   class Object;
+  class MethodType;
 }  // namespace mirror
 class ArtField;
 class ArtMethod;
@@ -50,20 +52,21 @@
 
 // ArrayStoreException
 
-void ThrowArrayStoreException(mirror::Class* element_class, mirror::Class* array_class)
+void ThrowArrayStoreException(ObjPtr<mirror::Class> element_class,
+                              ObjPtr<mirror::Class> array_class)
     REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
 
 // ClassCircularityError
 
-void ThrowClassCircularityError(mirror::Class* c)
+void ThrowClassCircularityError(ObjPtr<mirror::Class> c)
     REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
 
-void ThrowClassCircularityError(mirror::Class* c, const char* fmt, ...)
+void ThrowClassCircularityError(ObjPtr<mirror::Class> c, const char* fmt, ...)
     REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
 
 // ClassCastException
 
-void ThrowClassCastException(mirror::Class* dest_type, mirror::Class* src_type)
+void ThrowClassCastException(ObjPtr<mirror::Class> dest_type, ObjPtr<mirror::Class> src_type)
     REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
 
 void ThrowClassCastException(const char* msg)
@@ -71,30 +74,31 @@
 
 // ClassFormatError
 
-void ThrowClassFormatError(mirror::Class* referrer, const char* fmt, ...)
+void ThrowClassFormatError(ObjPtr<mirror::Class> referrer, const char* fmt, ...)
     __attribute__((__format__(__printf__, 2, 3)))
     REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
 
 // IllegalAccessError
 
-void ThrowIllegalAccessErrorClass(mirror::Class* referrer, mirror::Class* accessed)
+void ThrowIllegalAccessErrorClass(ObjPtr<mirror::Class> referrer, ObjPtr<mirror::Class> accessed)
     REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
 
-void ThrowIllegalAccessErrorClassForMethodDispatch(mirror::Class* referrer, mirror::Class* accessed,
+void ThrowIllegalAccessErrorClassForMethodDispatch(ObjPtr<mirror::Class> referrer,
+                                                   ObjPtr<mirror::Class> accessed,
                                                    ArtMethod* called,
                                                    InvokeType type)
     REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
 
-void ThrowIllegalAccessErrorMethod(mirror::Class* referrer, ArtMethod* accessed)
+void ThrowIllegalAccessErrorMethod(ObjPtr<mirror::Class> referrer, ArtMethod* accessed)
     REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
 
-void ThrowIllegalAccessErrorField(mirror::Class* referrer, ArtField* accessed)
+void ThrowIllegalAccessErrorField(ObjPtr<mirror::Class> referrer, ArtField* accessed)
     REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
 
 void ThrowIllegalAccessErrorFinalField(ArtMethod* referrer, ArtField* accessed)
     REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
 
-void ThrowIllegalAccessError(mirror::Class* referrer, const char* fmt, ...)
+void ThrowIllegalAccessError(ObjPtr<mirror::Class> referrer, const char* fmt, ...)
     __attribute__((__format__(__printf__, 2, 3)))
     REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
 
@@ -110,26 +114,29 @@
 
 // IncompatibleClassChangeError
 
-void ThrowIncompatibleClassChangeError(InvokeType expected_type, InvokeType found_type,
-                                       ArtMethod* method, ArtMethod* referrer)
+void ThrowIncompatibleClassChangeError(InvokeType expected_type,
+                                       InvokeType found_type,
+                                       ArtMethod* method,
+                                       ArtMethod* referrer)
     REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
 
 void ThrowIncompatibleClassChangeErrorClassForInterfaceSuper(ArtMethod* method,
-                                                             mirror::Class* target_class,
-                                                             mirror::Object* this_object,
+                                                             ObjPtr<mirror::Class> target_class,
+                                                             ObjPtr<mirror::Object> this_object,
                                                              ArtMethod* referrer)
     REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
 
 void ThrowIncompatibleClassChangeErrorClassForInterfaceDispatch(ArtMethod* interface_method,
-                                                                mirror::Object* this_object,
+                                                                ObjPtr<mirror::Object> this_object,
                                                                 ArtMethod* referrer)
     REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
 
-void ThrowIncompatibleClassChangeErrorField(ArtField* resolved_field, bool is_static,
+void ThrowIncompatibleClassChangeErrorField(ArtField* resolved_field,
+                                            bool is_static,
                                             ArtMethod* referrer)
     REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
 
-void ThrowIncompatibleClassChangeError(mirror::Class* referrer, const char* fmt, ...)
+void ThrowIncompatibleClassChangeError(ObjPtr<mirror::Class> referrer, const char* fmt, ...)
     __attribute__((__format__(__printf__, 2, 3)))
     REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
 
@@ -146,11 +153,11 @@
 
 // LinkageError
 
-void ThrowLinkageError(mirror::Class* referrer, const char* fmt, ...)
+void ThrowLinkageError(ObjPtr<mirror::Class> referrer, const char* fmt, ...)
     __attribute__((__format__(__printf__, 2, 3)))
     REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
 
-void ThrowWrappedLinkageError(mirror::Class* referrer, const char* fmt, ...)
+void ThrowWrappedLinkageError(ObjPtr<mirror::Class> referrer, const char* fmt, ...)
     __attribute__((__format__(__printf__, 2, 3)))
     REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
 
@@ -165,16 +172,20 @@
 
 // NoSuchFieldError
 
-void ThrowNoSuchFieldError(const StringPiece& scope, mirror::Class* c,
-                           const StringPiece& type, const StringPiece& name)
+void ThrowNoSuchFieldError(const StringPiece& scope,
+                           ObjPtr<mirror::Class> c,
+                           const StringPiece& type,
+                           const StringPiece& name)
     REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
 
-void ThrowNoSuchFieldException(mirror::Class* c, const StringPiece& name)
+void ThrowNoSuchFieldException(ObjPtr<mirror::Class> c, const StringPiece& name)
     REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
 
 // NoSuchMethodError
 
-void ThrowNoSuchMethodError(InvokeType type, mirror::Class* c, const StringPiece& name,
+void ThrowNoSuchMethodError(InvokeType type,
+                            ObjPtr<mirror::Class> c,
+                            const StringPiece& name,
                             const Signature& signature)
     REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
 
@@ -204,6 +215,12 @@
     __attribute__((__format__(__printf__, 1, 2)))
     REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
 
+// SecurityException
+
+void ThrowSecurityException(const char* fmt, ...)
+    __attribute__((__format__(__printf__, 1, 2)))
+    REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
+
 // Stack overflow.
 
 void ThrowStackOverflowError(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
@@ -215,10 +232,15 @@
 
 // VerifyError
 
-void ThrowVerifyError(mirror::Class* referrer, const char* fmt, ...)
+void ThrowVerifyError(ObjPtr<mirror::Class> referrer, const char* fmt, ...)
     __attribute__((__format__(__printf__, 2, 3)))
     REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
 
+// WrontMethodTypeException
+void ThrowWrongMethodTypeException(mirror::MethodType* callee_type,
+                                   mirror::MethodType* callsite_type)
+    REQUIRES_SHARED(Locks::mutator_lock_) COLD_ATTR;
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_COMMON_THROWS_H_
diff --git a/runtime/compiler_callbacks.h b/runtime/compiler_callbacks.h
index 00dedef..806653a 100644
--- a/runtime/compiler_callbacks.h
+++ b/runtime/compiler_callbacks.h
@@ -47,6 +47,7 @@
   virtual bool IsRelocationPossible() = 0;
 
   virtual verifier::VerifierDeps* GetVerifierDeps() const = 0;
+  virtual void SetVerifierDeps(verifier::VerifierDeps* deps ATTRIBUTE_UNUSED) {}
 
   bool IsBootImage() {
     return mode_ == CallbackMode::kCompileBootImage;
diff --git a/runtime/debugger.cc b/runtime/debugger.cc
index a7feeef..dc2ae2e 100644
--- a/runtime/debugger.cc
+++ b/runtime/debugger.cc
@@ -39,6 +39,8 @@
 #include "handle_scope.h"
 #include "jdwp/jdwp_priv.h"
 #include "jdwp/object_registry.h"
+#include "jni_internal.h"
+#include "jvalue-inl.h"
 #include "mirror/class.h"
 #include "mirror/class-inl.h"
 #include "mirror/class_loader.h"
@@ -133,7 +135,8 @@
 
 static std::ostream& operator<<(std::ostream& os, const Breakpoint& rhs)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  os << StringPrintf("Breakpoint[%s @%#x]", PrettyMethod(rhs.Method()).c_str(), rhs.DexPc());
+  os << StringPrintf("Breakpoint[%s @%#x]", ArtMethod::PrettyMethod(rhs.Method()).c_str(),
+                     rhs.DexPc());
   return os;
 }
 
@@ -189,7 +192,7 @@
                     ArtMethod* method, uint32_t dex_pc)
       OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
     // We're not recorded to listen to this kind of event, so complain.
-    LOG(ERROR) << "Unexpected method unwind event in debugger " << PrettyMethod(method)
+    LOG(ERROR) << "Unexpected method unwind event in debugger " << ArtMethod::PrettyMethod(method)
                << " " << dex_pc;
   }
 
@@ -235,7 +238,7 @@
   // We only care about branches in the Jit.
   void Branch(Thread* /*thread*/, ArtMethod* method, uint32_t dex_pc, int32_t dex_pc_offset)
       OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
-    LOG(ERROR) << "Unexpected branch event in debugger " << PrettyMethod(method)
+    LOG(ERROR) << "Unexpected branch event in debugger " << ArtMethod::PrettyMethod(method)
                << " " << dex_pc << ", " << dex_pc_offset;
   }
 
@@ -246,7 +249,7 @@
                                 uint32_t dex_pc,
                                 ArtMethod*)
       OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
-    LOG(ERROR) << "Unexpected invoke event in debugger " << PrettyMethod(method)
+    LOG(ERROR) << "Unexpected invoke event in debugger " << ArtMethod::PrettyMethod(method)
                << " " << dex_pc;
   }
 
@@ -549,6 +552,10 @@
   gJdwpAllowed = allowed;
 }
 
+bool Dbg::IsJdwpAllowed() {
+  return gJdwpAllowed;
+}
+
 DebugInvokeReq* Dbg::GetInvokeReq() {
   return Thread::Current()->GetInvokeReq();
 }
@@ -580,7 +587,7 @@
   explicit UpdateEntryPointsClassVisitor(instrumentation::Instrumentation* instrumentation)
       : instrumentation_(instrumentation) {}
 
-  bool operator()(mirror::Class* klass) OVERRIDE REQUIRES(Locks::mutator_lock_) {
+  bool operator()(ObjPtr<mirror::Class> klass) OVERRIDE REQUIRES(Locks::mutator_lock_) {
     auto pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
     for (auto& m : klass->GetMethods(pointer_size)) {
       const void* code = m.GetEntryPointFromQuickCompiledCode();
@@ -892,15 +899,16 @@
                                        std::vector<uint64_t>* counts) {
   gc::Heap* heap = Runtime::Current()->GetHeap();
   heap->CollectGarbage(false);
-  std::vector<mirror::Class*> classes;
+  VariableSizedHandleScope hs(Thread::Current());
+  std::vector<Handle<mirror::Class>> classes;
   counts->clear();
   for (size_t i = 0; i < class_ids.size(); ++i) {
     JDWP::JdwpError error;
-    mirror::Class* c = DecodeClass(class_ids[i], &error);
+    ObjPtr<mirror::Class> c = DecodeClass(class_ids[i], &error);
     if (c == nullptr) {
       return error;
     }
-    classes.push_back(c);
+    classes.push_back(hs.NewHandle(c));
     counts->push_back(0);
   }
   heap->CountInstances(classes, false, &(*counts)[0]);
@@ -913,14 +921,15 @@
   // We only want reachable instances, so do a GC.
   heap->CollectGarbage(false);
   JDWP::JdwpError error;
-  mirror::Class* c = DecodeClass(class_id, &error);
+  ObjPtr<mirror::Class> c = DecodeClass(class_id, &error);
   if (c == nullptr) {
     return error;
   }
-  std::vector<mirror::Object*> raw_instances;
-  Runtime::Current()->GetHeap()->GetInstances(c, max_count, raw_instances);
+  VariableSizedHandleScope hs(Thread::Current());
+  std::vector<Handle<mirror::Object>> raw_instances;
+  Runtime::Current()->GetHeap()->GetInstances(hs, hs.NewHandle(c), max_count, raw_instances);
   for (size_t i = 0; i < raw_instances.size(); ++i) {
-    instances->push_back(gRegistry->Add(raw_instances[i]));
+    instances->push_back(gRegistry->Add(raw_instances[i].Get()));
   }
   return JDWP::ERR_NONE;
 }
@@ -930,14 +939,15 @@
   gc::Heap* heap = Runtime::Current()->GetHeap();
   heap->CollectGarbage(false);
   JDWP::JdwpError error;
-  mirror::Object* o = gRegistry->Get<mirror::Object*>(object_id, &error);
+  ObjPtr<mirror::Object> o = gRegistry->Get<mirror::Object*>(object_id, &error);
   if (o == nullptr) {
     return JDWP::ERR_INVALID_OBJECT;
   }
-  std::vector<mirror::Object*> raw_instances;
-  heap->GetReferringObjects(o, max_count, raw_instances);
+  VariableSizedHandleScope hs(Thread::Current());
+  std::vector<Handle<mirror::Object>> raw_instances;
+  heap->GetReferringObjects(hs, hs.NewHandle(o), max_count, raw_instances);
   for (size_t i = 0; i < raw_instances.size(); ++i) {
-    referring_objects->push_back(gRegistry->Add(raw_instances[i]));
+    referring_objects->push_back(gRegistry->Add(raw_instances[i].Get()));
   }
   return JDWP::ERR_NONE;
 }
@@ -1017,7 +1027,7 @@
  public:
   explicit ClassListCreator(std::vector<JDWP::RefTypeId>* classes) : classes_(classes) {}
 
-  bool operator()(mirror::Class* c) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
+  bool operator()(ObjPtr<mirror::Class> c) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
     if (!c->IsPrimitive()) {
       classes_->push_back(Dbg::GetObjectRegistry()->AddRefType(c));
     }
@@ -1061,11 +1071,11 @@
 }
 
 void Dbg::FindLoadedClassBySignature(const char* descriptor, std::vector<JDWP::RefTypeId>* ids) {
-  std::vector<mirror::Class*> classes;
+  std::vector<ObjPtr<mirror::Class>> classes;
   Runtime::Current()->GetClassLinker()->LookupClasses(descriptor, classes);
   ids->clear();
-  for (size_t i = 0; i < classes.size(); ++i) {
-    ids->push_back(gRegistry->Add(classes[i]));
+  for (ObjPtr<mirror::Class> c : classes) {
+    ids->push_back(gRegistry->Add(c));
   }
 }
 
@@ -1286,7 +1296,7 @@
     return error;
   }
   Thread* self = Thread::Current();
-  mirror::Object* new_object;
+  ObjPtr<mirror::Object> new_object;
   if (c->IsStringClass()) {
     // Special case for java.lang.String.
     gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
@@ -1297,11 +1307,11 @@
   if (new_object == nullptr) {
     DCHECK(self->IsExceptionPending());
     self->ClearException();
-    LOG(ERROR) << "Could not allocate object of type " << PrettyDescriptor(c);
+    LOG(ERROR) << "Could not allocate object of type " << mirror::Class::PrettyDescriptor(c);
     *new_object_id = 0;
     return JDWP::ERR_OUT_OF_MEMORY;
   }
-  *new_object_id = gRegistry->Add(new_object);
+  *new_object_id = gRegistry->Add(new_object.Ptr());
   return JDWP::ERR_NONE;
 }
 
@@ -1324,7 +1334,7 @@
   if (new_array == nullptr) {
     DCHECK(self->IsExceptionPending());
     self->ClearException();
-    LOG(ERROR) << "Could not allocate array of type " << PrettyDescriptor(c);
+    LOG(ERROR) << "Could not allocate array of type " << mirror::Class::PrettyDescriptor(c);
     *new_array_id = 0;
     return JDWP::ERR_OUT_OF_MEMORY;
   }
@@ -1445,7 +1455,7 @@
   if (code_item == nullptr) {
     // We should not get here for a method without code (native, proxy or abstract). Log it and
     // return the slot as is since all registers are arguments.
-    LOG(WARNING) << "Trying to mangle slot for method without code " << PrettyMethod(m);
+    LOG(WARNING) << "Trying to mangle slot for method without code " << m->PrettyMethod();
     return slot;
   }
   uint16_t ins_size = code_item->ins_size_;
@@ -1476,7 +1486,8 @@
   if (code_item == nullptr) {
     // We should not get here for a method without code (native, proxy or abstract). Log it and
     // return the slot as is since all registers are arguments.
-    LOG(WARNING) << "Trying to demangle slot for method without code " << PrettyMethod(m);
+    LOG(WARNING) << "Trying to demangle slot for method without code "
+                 << m->PrettyMethod();
     uint16_t vreg_count = GetMethodNumArgRegistersIncludingThis(m);
     if (slot < vreg_count) {
       *error = JDWP::ERR_NONE;
@@ -1492,7 +1503,7 @@
   }
 
   // Slot is invalid in the method.
-  LOG(ERROR) << "Invalid local slot " << slot << " for method " << PrettyMethod(m);
+  LOG(ERROR) << "Invalid local slot " << slot << " for method " << m->PrettyMethod();
   *error = JDWP::ERR_INVALID_SLOT;
   return DexFile::kDexNoIndex16;
 }
@@ -1780,14 +1791,16 @@
 
   // TODO: should we give up now if receiver_class is null?
   if (receiver_class != nullptr && !f->GetDeclaringClass()->IsAssignableFrom(receiver_class)) {
-    LOG(INFO) << "ERR_INVALID_FIELDID: " << PrettyField(f) << " " << PrettyClass(receiver_class);
+    LOG(INFO) << "ERR_INVALID_FIELDID: " << f->PrettyField() << " "
+              << receiver_class->PrettyClass();
     return JDWP::ERR_INVALID_FIELDID;
   }
 
   // Ensure the field's class is initialized.
   Handle<mirror::Class> klass(hs.NewHandle(f->GetDeclaringClass()));
   if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(self, klass, true, false)) {
-    LOG(WARNING) << "Not able to initialize class for SetValues: " << PrettyClass(klass.Get());
+    LOG(WARNING) << "Not able to initialize class for SetValues: "
+                 << mirror::Class::PrettyClass(klass.Get());
   }
 
   // The RI only enforces the static/non-static mismatch in one direction.
@@ -1799,7 +1812,7 @@
   } else {
     if (f->IsStatic()) {
       LOG(WARNING) << "Ignoring non-nullptr receiver for ObjectReference.GetValues"
-                   << " on static field " << PrettyField(f);
+                   << " on static field " << f->PrettyField();
     }
   }
   if (f->IsStatic()) {
@@ -1908,7 +1921,8 @@
   // Ensure the field's class is initialized.
   Handle<mirror::Class> klass(hs.NewHandle(f->GetDeclaringClass()));
   if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(self, klass, true, false)) {
-    LOG(WARNING) << "Not able to initialize class for SetValues: " << PrettyClass(klass.Get());
+    LOG(WARNING) << "Not able to initialize class for SetValues: "
+                 << mirror::Class::PrettyClass(klass.Get());
   }
 
   // The RI only enforces the static/non-static mismatch in one direction.
@@ -1920,7 +1934,7 @@
   } else {
     if (f->IsStatic()) {
       LOG(WARNING) << "Ignoring non-nullptr receiver for ObjectReference.SetValues"
-                   << " on static field " << PrettyField(f);
+                   << " on static field " << f->PrettyField();
     }
   }
   if (f->IsStatic()) {
@@ -1994,8 +2008,8 @@
   mirror::Object* thread_object = gRegistry->Get<mirror::Object*>(thread_id, &error);
   CHECK(thread_object != nullptr) << error;
   ArtField* java_lang_Thread_name_field =
-      soa.DecodeField(WellKnownClasses::java_lang_Thread_name);
-  ObjPtr<mirror::String> s(java_lang_Thread_name_field->GetObject(thread_object));
+      jni::DecodeArtField(WellKnownClasses::java_lang_Thread_name);
+  ObjPtr<mirror::String> s(java_lang_Thread_name_field->GetObject(thread_object)->AsString());
   if (s != nullptr) {
     *name = s->ToModifiedUtf8();
   }
@@ -2019,7 +2033,7 @@
   } else if (error == JDWP::ERR_NONE) {
     ObjPtr<mirror::Class> c = soa.Decode<mirror::Class>(WellKnownClasses::java_lang_Thread);
     CHECK(c != nullptr);
-    ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_group);
+    ArtField* f = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_group);
     CHECK(f != nullptr);
     ObjPtr<mirror::Object> group = f->GetObject(thread_object);
     CHECK(group != nullptr);
@@ -2061,7 +2075,7 @@
     return error;
   }
   ScopedAssertNoThreadSuspension ants("Debugger: GetThreadGroupName");
-  ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_name);
+  ArtField* f = jni::DecodeArtField(WellKnownClasses::java_lang_ThreadGroup_name);
   CHECK(f != nullptr);
   ObjPtr<mirror::String> s = f->GetObject(thread_group)->AsString();
 
@@ -2080,7 +2094,7 @@
   ObjPtr<mirror::Object> parent;
   {
     ScopedAssertNoThreadSuspension ants("Debugger: GetThreadGroupParent");
-    ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_parent);
+    ArtField* f = jni::DecodeArtField(WellKnownClasses::java_lang_ThreadGroup_parent);
     CHECK(f != nullptr);
     parent = f->GetObject(thread_group);
   }
@@ -2089,13 +2103,13 @@
   return JDWP::ERR_NONE;
 }
 
-static void GetChildThreadGroups(ScopedObjectAccessUnchecked& soa, mirror::Object* thread_group,
+static void GetChildThreadGroups(mirror::Object* thread_group,
                                  std::vector<JDWP::ObjectId>* child_thread_group_ids)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   CHECK(thread_group != nullptr);
 
   // Get the int "ngroups" count of this thread group...
-  ArtField* ngroups_field = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_ngroups);
+  ArtField* ngroups_field = jni::DecodeArtField(WellKnownClasses::java_lang_ThreadGroup_ngroups);
   CHECK(ngroups_field != nullptr);
   const int32_t size = ngroups_field->GetInt(thread_group);
   if (size == 0) {
@@ -2103,7 +2117,7 @@
   }
 
   // Get the ThreadGroup[] "groups" out of this thread group...
-  ArtField* groups_field = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_groups);
+  ArtField* groups_field = jni::DecodeArtField(WellKnownClasses::java_lang_ThreadGroup_groups);
   ObjPtr<mirror::Object> groups_array = groups_field->GetObject(thread_group);
 
   CHECK(groups_array != nullptr);
@@ -2141,7 +2155,7 @@
   // Add child thread groups.
   {
     std::vector<JDWP::ObjectId> child_thread_groups_ids;
-    GetChildThreadGroups(soa, thread_group, &child_thread_groups_ids);
+    GetChildThreadGroups(thread_group, &child_thread_groups_ids);
     expandBufAdd4BE(pReply, child_thread_groups_ids.size());
     for (JDWP::ObjectId child_thread_group_id : child_thread_groups_ids) {
       expandBufAddObjectId(pReply, child_thread_group_id);
@@ -2153,7 +2167,7 @@
 
 JDWP::ObjectId Dbg::GetSystemThreadGroupId() {
   ScopedObjectAccessUnchecked soa(Thread::Current());
-  ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_systemThreadGroup);
+  ArtField* f = jni::DecodeArtField(WellKnownClasses::java_lang_ThreadGroup_systemThreadGroup);
   ObjPtr<mirror::Object> group = f->GetObject(f->GetDeclaringClass());
   return gRegistry->Add(group);
 }
@@ -2243,14 +2257,13 @@
   return JDWP::ERR_NONE;
 }
 
-static bool IsInDesiredThreadGroup(ScopedObjectAccessUnchecked& soa,
-                                   mirror::Object* desired_thread_group, mirror::Object* peer)
+static bool IsInDesiredThreadGroup(mirror::Object* desired_thread_group, mirror::Object* peer)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   // Do we want threads from all thread groups?
   if (desired_thread_group == nullptr) {
     return true;
   }
-  ArtField* thread_group_field = soa.DecodeField(WellKnownClasses::java_lang_Thread_group);
+  ArtField* thread_group_field = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_group);
   DCHECK(thread_group_field != nullptr);
   ObjPtr<mirror::Object> group = thread_group_field->GetObject(peer);
   return (group == desired_thread_group);
@@ -2283,7 +2296,7 @@
       // Doing so might help us report ZOMBIE threads too.
       continue;
     }
-    if (IsInDesiredThreadGroup(soa, thread_group, peer)) {
+    if (IsInDesiredThreadGroup(thread_group, peer)) {
       thread_ids->push_back(gRegistry->Add(peer));
     }
   }
@@ -2577,7 +2590,7 @@
 static std::string GetStackContextAsString(const StackVisitor& visitor)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   return StringPrintf(" at DEX pc 0x%08x in method %s", visitor.GetDexPc(false),
-                      PrettyMethod(visitor.GetMethod()).c_str());
+                      ArtMethod::PrettyMethod(visitor.GetMethod()).c_str());
 }
 
 static JDWP::JdwpError FailGetLocalValue(const StackVisitor& visitor, uint16_t vreg,
@@ -3146,14 +3159,14 @@
       VLOG(jdwp) << "Undeoptimize the world DONE";
       break;
     case DeoptimizationRequest::kSelectiveDeoptimization:
-      VLOG(jdwp) << "Deoptimize method " << PrettyMethod(request.Method()) << " ...";
+      VLOG(jdwp) << "Deoptimize method " << ArtMethod::PrettyMethod(request.Method()) << " ...";
       instrumentation->Deoptimize(request.Method());
-      VLOG(jdwp) << "Deoptimize method " << PrettyMethod(request.Method()) << " DONE";
+      VLOG(jdwp) << "Deoptimize method " << ArtMethod::PrettyMethod(request.Method()) << " DONE";
       break;
     case DeoptimizationRequest::kSelectiveUndeoptimization:
-      VLOG(jdwp) << "Undeoptimize method " << PrettyMethod(request.Method()) << " ...";
+      VLOG(jdwp) << "Undeoptimize method " << ArtMethod::PrettyMethod(request.Method()) << " ...";
       instrumentation->Undeoptimize(request.Method());
-      VLOG(jdwp) << "Undeoptimize method " << PrettyMethod(request.Method()) << " DONE";
+      VLOG(jdwp) << "Undeoptimize method " << ArtMethod::PrettyMethod(request.Method()) << " DONE";
       break;
     default:
       LOG(FATAL) << "Unsupported deoptimization request kind " << request.GetKind();
@@ -3222,14 +3235,14 @@
     case DeoptimizationRequest::kSelectiveDeoptimization: {
       DCHECK(req.Method() != nullptr);
       VLOG(jdwp) << "Queue request #" << deoptimization_requests_.size()
-                 << " for deoptimization of " << PrettyMethod(req.Method());
+                 << " for deoptimization of " << req.Method()->PrettyMethod();
       deoptimization_requests_.push_back(req);
       break;
     }
     case DeoptimizationRequest::kSelectiveUndeoptimization: {
       DCHECK(req.Method() != nullptr);
       VLOG(jdwp) << "Queue request #" << deoptimization_requests_.size()
-                 << " for undeoptimization of " << PrettyMethod(req.Method());
+                 << " for undeoptimization of " << req.Method()->PrettyMethod();
       deoptimization_requests_.push_back(req);
       break;
     }
@@ -3321,7 +3334,7 @@
   if (!Dbg::RequiresDeoptimization()) {
     // We already run in interpreter-only mode so we don't need to deoptimize anything.
     VLOG(jdwp) << "No need for deoptimization when fully running with interpreter for method "
-               << PrettyMethod(m);
+               << ArtMethod::PrettyMethod(m);
     return DeoptimizationRequest::kNothing;
   }
   const Breakpoint* first_breakpoint;
@@ -3340,17 +3353,19 @@
     bool need_full_deoptimization = m->IsDefault();
     if (need_full_deoptimization) {
       VLOG(jdwp) << "Need full deoptimization because of copying of method "
-                 << PrettyMethod(m);
+                 << ArtMethod::PrettyMethod(m);
       return DeoptimizationRequest::kFullDeoptimization;
     } else {
       // We don't need to deoptimize if the method has not been compiled.
       const bool is_compiled = m->HasAnyCompiledCode();
       if (is_compiled) {
-        VLOG(jdwp) << "Need selective deoptimization for compiled method " << PrettyMethod(m);
+        VLOG(jdwp) << "Need selective deoptimization for compiled method "
+                   << ArtMethod::PrettyMethod(m);
         return DeoptimizationRequest::kSelectiveDeoptimization;
       } else {
         // Method is not compiled: we don't need to deoptimize.
-        VLOG(jdwp) << "No need for deoptimization for non-compiled method " << PrettyMethod(m);
+        VLOG(jdwp) << "No need for deoptimization for non-compiled method "
+                   << ArtMethod::PrettyMethod(m);
         return DeoptimizationRequest::kNothing;
       }
     }
@@ -3580,7 +3595,8 @@
 
   bool VisitFrame() OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
     // The visitor is meant to be used when handling exception from compiled code only.
-    CHECK(!IsShadowFrame()) << "We only expect to visit compiled frame: " << PrettyMethod(GetMethod());
+    CHECK(!IsShadowFrame()) << "We only expect to visit compiled frame: "
+                            << ArtMethod::PrettyMethod(GetMethod());
     ArtMethod* method = GetMethod();
     if (method == nullptr) {
       // We reach an upcall and don't need to deoptimize this part of the stack (ManagedFragment)
@@ -3806,7 +3822,8 @@
     VLOG(jdwp) << "Single-step thread: " << *thread;
     VLOG(jdwp) << "Single-step step size: " << single_step_control->GetStepSize();
     VLOG(jdwp) << "Single-step step depth: " << single_step_control->GetStepDepth();
-    VLOG(jdwp) << "Single-step current method: " << PrettyMethod(single_step_control->GetMethod());
+    VLOG(jdwp) << "Single-step current method: "
+               << ArtMethod::PrettyMethod(single_step_control->GetMethod());
     VLOG(jdwp) << "Single-step current line: " << line_number;
     VLOG(jdwp) << "Single-step current stack depth: " << single_step_control->GetStackDepth();
     VLOG(jdwp) << "Single-step dex_pc values:";
@@ -4062,12 +4079,12 @@
     ArtMethod* actual_method =
         pReq->klass.Read()->FindVirtualMethodForVirtualOrInterface(m, image_pointer_size);
     if (actual_method != m) {
-      VLOG(jdwp) << "ExecuteMethod translated " << PrettyMethod(m)
-                 << " to " << PrettyMethod(actual_method);
+      VLOG(jdwp) << "ExecuteMethod translated " << ArtMethod::PrettyMethod(m)
+                 << " to " << ArtMethod::PrettyMethod(actual_method);
       m = actual_method;
     }
   }
-  VLOG(jdwp) << "ExecuteMethod " << PrettyMethod(m)
+  VLOG(jdwp) << "ExecuteMethod " << ArtMethod::PrettyMethod(m)
              << " receiver=" << pReq->receiver.Read()
              << " arg_count=" << pReq->arg_count;
   CHECK(m != nullptr);
@@ -4076,7 +4093,7 @@
 
   // Invoke the method.
   ScopedLocalRef<jobject> ref(soa.Env(), soa.AddLocalReference<jobject>(pReq->receiver.Read()));
-  JValue result = InvokeWithJValues(soa, ref.get(), soa.EncodeMethod(m),
+  JValue result = InvokeWithJValues(soa, ref.get(), jni::EncodeArtMethod(m),
                                     reinterpret_cast<jvalue*>(pReq->arg_values.get()));
 
   // Prepare JDWP ids for the reply.
@@ -4354,7 +4371,7 @@
     CHECK(type == CHUNK_TYPE("THCR") || type == CHUNK_TYPE("THNM")) << type;
     ScopedObjectAccessUnchecked soa(Thread::Current());
     StackHandleScope<1> hs(soa.Self());
-    Handle<mirror::String> name(hs.NewHandle(t->GetThreadName(soa)));
+    Handle<mirror::String> name(hs.NewHandle(t->GetThreadName()));
     size_t char_count = (name.Get() != nullptr) ? name->GetLength() : 0;
     const jchar* chars = (name.Get() != nullptr) ? name->GetValue() : nullptr;
     bool is_compressed = (name.Get() != nullptr) ? name->IsCompressed() : false;
@@ -4869,12 +4886,13 @@
     const gc::AllocRecord* record = &it->second;
 
     LOG(INFO) << StringPrintf(" Thread %-2d %6zd bytes ", record->GetTid(), record->ByteCount())
-              << PrettyClass(record->GetClass());
+              << mirror::Class::PrettyClass(record->GetClass());
 
     for (size_t stack_frame = 0, depth = record->GetDepth(); stack_frame < depth; ++stack_frame) {
       const gc::AllocRecordStackTraceElement& stack_element = record->StackElement(stack_frame);
       ArtMethod* m = stack_element.GetMethod();
-      LOG(INFO) << "    " << PrettyMethod(m) << " line " << stack_element.ComputeLineNumber();
+      LOG(INFO) << "    " << ArtMethod::PrettyMethod(m) << " line "
+                << stack_element.ComputeLineNumber();
     }
 
     // pause periodically to help logcat catch up
@@ -5099,13 +5117,11 @@
 }
 
 ArtMethod* DeoptimizationRequest::Method() const {
-  ScopedObjectAccessUnchecked soa(Thread::Current());
-  return soa.DecodeMethod(method_);
+  return jni::DecodeArtMethod(method_);
 }
 
 void DeoptimizationRequest::SetMethod(ArtMethod* m) {
-  ScopedObjectAccessUnchecked soa(Thread::Current());
-  method_ = soa.EncodeMethod(m);
+  method_ = jni::EncodeArtMethod(m);
 }
 
 void Dbg::VisitRoots(RootVisitor* visitor) {
diff --git a/runtime/debugger.h b/runtime/debugger.h
index 5d0315e..3b4a5e1 100644
--- a/runtime/debugger.h
+++ b/runtime/debugger.h
@@ -202,6 +202,7 @@
 class Dbg {
  public:
   static void SetJdwpAllowed(bool allowed);
+  static bool IsJdwpAllowed();
 
   static void StartJdwp();
   static void StopJdwp();
diff --git a/runtime/dex_file-inl.h b/runtime/dex_file-inl.h
index 108a5af..621b2c5 100644
--- a/runtime/dex_file-inl.h
+++ b/runtime/dex_file-inl.h
@@ -90,6 +90,10 @@
   return Signature(this, GetProtoId(method_id.proto_idx_));
 }
 
+inline const Signature DexFile::GetProtoSignature(const ProtoId& proto_id) const {
+  return Signature(this, proto_id);
+}
+
 inline const char* DexFile::GetMethodName(const MethodId& method_id) const {
   return StringDataByIdx(method_id.name_idx_);
 }
diff --git a/runtime/dex_file.cc b/runtime/dex_file.cc
index 0af086c..2ef7509 100644
--- a/runtime/dex_file.cc
+++ b/runtime/dex_file.cc
@@ -29,25 +29,17 @@
 
 #include "base/enums.h"
 #include "base/file_magic.h"
-#include "base/hash_map.h"
 #include "base/logging.h"
-#include "base/stl_util.h"
 #include "base/stringprintf.h"
 #include "base/systrace.h"
 #include "base/unix_file/fd_file.h"
 #include "dex_file-inl.h"
 #include "dex_file_verifier.h"
-#include "globals.h"
 #include "jvalue.h"
 #include "leb128.h"
-#include "oat_file.h"
 #include "os.h"
-#include "safe_map.h"
-#include "thread.h"
-#include "type_lookup_table.h"
 #include "utf-inl.h"
 #include "utils.h"
-#include "well_known_classes.h"
 #include "zip_archive.h"
 
 namespace art {
@@ -933,7 +925,7 @@
   }
   if (i != parameters_size || it.HasNext()) {
     LOG(ERROR) << "invalid stream - problem with parameter iterator in " << GetLocation()
-               << " for method " << PrettyMethod(method_idx, *this);
+               << " for method " << this->PrettyMethod(method_idx);
     return false;
   }
 
@@ -1197,6 +1189,50 @@
   return val;
 }
 
+std::string DexFile::PrettyMethod(uint32_t method_idx, bool with_signature) const {
+  if (method_idx >= NumMethodIds()) {
+    return StringPrintf("<<invalid-method-idx-%d>>", method_idx);
+  }
+  const DexFile::MethodId& method_id = GetMethodId(method_idx);
+  std::string result(PrettyDescriptor(GetMethodDeclaringClassDescriptor(method_id)));
+  result += '.';
+  result += GetMethodName(method_id);
+  if (with_signature) {
+    const Signature signature = GetMethodSignature(method_id);
+    std::string sig_as_string(signature.ToString());
+    if (signature == Signature::NoSignature()) {
+      return result + sig_as_string;
+    }
+    result = PrettyReturnType(sig_as_string.c_str()) + " " + result +
+        PrettyArguments(sig_as_string.c_str());
+  }
+  return result;
+}
+
+std::string DexFile::PrettyField(uint32_t field_idx, bool with_type) const {
+  if (field_idx >= NumFieldIds()) {
+    return StringPrintf("<<invalid-field-idx-%d>>", field_idx);
+  }
+  const DexFile::FieldId& field_id = GetFieldId(field_idx);
+  std::string result;
+  if (with_type) {
+    result += GetFieldTypeDescriptor(field_id);
+    result += ' ';
+  }
+  result += PrettyDescriptor(GetFieldDeclaringClassDescriptor(field_id));
+  result += '.';
+  result += GetFieldName(field_id);
+  return result;
+}
+
+std::string DexFile::PrettyType(uint32_t type_idx) const {
+  if (type_idx >= NumTypeIds()) {
+    return StringPrintf("<<invalid-type-idx-%d>>", type_idx);
+  }
+  const DexFile::TypeId& type_id = GetTypeId(type_idx);
+  return PrettyDescriptor(GetTypeDescriptor(type_id));
+}
+
 // Checks that visibility is as expected. Includes special behavior for M and
 // before to allow runtime and build visibility when expecting runtime.
 std::ostream& operator<<(std::ostream& os, const DexFile& dex_file) {
diff --git a/runtime/dex_file.h b/runtime/dex_file.h
index 29b8c3a..da9fa50 100644
--- a/runtime/dex_file.h
+++ b/runtime/dex_file.h
@@ -22,7 +22,6 @@
 #include <vector>
 
 #include "base/logging.h"
-#include "base/mutex.h"  // For Locks::mutator_lock_.
 #include "base/value_object.h"
 #include "globals.h"
 #include "invoke_type.h"
@@ -36,10 +35,8 @@
 class OatDexFile;
 class Signature;
 class StringPiece;
-class TypeLookupTable;
 class ZipArchive;
 
-// TODO: move all of the macro functionality into the DexCache class.
 class DexFile {
  public:
   // First Dex format version supporting default methods.
@@ -639,6 +636,9 @@
   // Returns a representation of the signature of a method id.
   const Signature GetMethodSignature(const MethodId& method_id) const;
 
+  // Returns a representation of the signature of a proto id.
+  const Signature GetProtoSignature(const ProtoId& proto_id) const;
+
   // Returns the name of a method id.
   const char* GetMethodName(const MethodId& method_id) const;
 
@@ -1016,6 +1016,13 @@
   static int64_t ReadSignedLong(const uint8_t* ptr, int zwidth);
   static uint64_t ReadUnsignedLong(const uint8_t* ptr, int zwidth, bool fill_on_right);
 
+  // Returns a human-readable form of the method at an index.
+  std::string PrettyMethod(uint32_t method_idx, bool with_signature = true) const;
+  // Returns a human-readable form of the field at an index.
+  std::string PrettyField(uint32_t field_idx, bool with_type = true) const;
+  // Returns a human-readable form of the type at an index.
+  std::string PrettyType(uint32_t type_idx) const;
+
  private:
   static std::unique_ptr<const DexFile> OpenFile(int fd,
                                                  const std::string& location,
diff --git a/runtime/dex_file_annotations.cc b/runtime/dex_file_annotations.cc
index 367603e..835f456 100644
--- a/runtime/dex_file_annotations.cc
+++ b/runtime/dex_file_annotations.cc
@@ -22,7 +22,8 @@
 #include "art_method-inl.h"
 #include "class_linker-inl.h"
 #include "dex_file-inl.h"
-#include "jvalue.h"
+#include "jni_internal.h"
+#include "jvalue-inl.h"
 #include "mirror/field.h"
 #include "mirror/method.h"
 #include "reflection.h"
@@ -247,14 +248,13 @@
   Handle<mirror::Class> annotation_class(hs.NewHandle(
       class_linker->ResolveType(klass->GetDexFile(), type_index, klass.Get())));
   if (annotation_class.Get() == nullptr) {
-    LOG(INFO) << "Unable to resolve " << PrettyClass(klass.Get()) << " annotation class "
-              << type_index;
+    LOG(INFO) << "Unable to resolve " << klass->PrettyClass() << " annotation class " << type_index;
     DCHECK(Thread::Current()->IsExceptionPending());
     Thread::Current()->ClearException();
     return nullptr;
   }
 
-  mirror::Class* annotation_member_class =
+  ObjPtr<mirror::Class> annotation_member_class =
       soa.Decode<mirror::Class>(WellKnownClasses::libcore_reflect_AnnotationMember).Ptr();
   mirror::Class* annotation_member_array_class =
       class_linker->FindArrayClass(self, &annotation_member_class);
@@ -282,7 +282,7 @@
 
   JValue result;
   ArtMethod* create_annotation_method =
-      soa.DecodeMethod(WellKnownClasses::libcore_reflect_AnnotationFactory_createAnnotation);
+      jni::DecodeArtMethod(WellKnownClasses::libcore_reflect_AnnotationFactory_createAnnotation);
   uint32_t args[2] = { static_cast<uint32_t>(reinterpret_cast<uintptr_t>(annotation_class.Get())),
                        static_cast<uint32_t>(reinterpret_cast<uintptr_t>(h_element_array.Get())) };
   create_annotation_method->Invoke(self, args, sizeof(args), &result, "LLL");
@@ -611,7 +611,7 @@
   }
   Handle<mirror::Object> value_object(hs.NewHandle(annotation_value.value_.GetL()));
 
-  mirror::Class* annotation_member_class =
+  ObjPtr<mirror::Class> annotation_member_class =
       WellKnownClasses::ToClass(WellKnownClasses::libcore_reflect_AnnotationMember);
   Handle<mirror::Object> new_member(hs.NewHandle(annotation_member_class->AllocObject(self)));
   mirror::Method* method_obj_ptr;
@@ -634,7 +634,7 @@
 
   JValue result;
   ArtMethod* annotation_member_init =
-      soa.DecodeMethod(WellKnownClasses::libcore_reflect_AnnotationMember_init);
+      jni::DecodeArtMethod(WellKnownClasses::libcore_reflect_AnnotationMember_init);
   uint32_t args[5] = { static_cast<uint32_t>(reinterpret_cast<uintptr_t>(new_member.Get())),
                        static_cast<uint32_t>(reinterpret_cast<uintptr_t>(string_name.Get())),
                        static_cast<uint32_t>(reinterpret_cast<uintptr_t>(value_object.Get())),
@@ -731,7 +731,7 @@
   if (annotation_item == nullptr) {
     return nullptr;
   }
-  mirror::Class* string_class = mirror::String::GetJavaLangString();
+  ObjPtr<mirror::Class> string_class = mirror::String::GetJavaLangString();
   Handle<mirror::Class> string_array_class(hs.NewHandle(
       Runtime::Current()->GetClassLinker()->FindArrayClass(Thread::Current(), &string_class)));
   if (string_array_class.Get() == nullptr) {
@@ -757,7 +757,7 @@
   if (annotation_item == nullptr) {
     return nullptr;
   }
-  mirror::Class* class_class = mirror::Class::GetJavaLangClass();
+  ObjPtr<mirror::Class> class_class = mirror::Class::GetJavaLangClass();
   Handle<mirror::Class> class_array_class(hs.NewHandle(
       Runtime::Current()->GetClassLinker()->FindArrayClass(Thread::Current(), &class_class)));
   if (class_array_class.Get() == nullptr) {
@@ -839,8 +839,8 @@
   Thread* self = Thread::Current();
   ScopedObjectAccessUnchecked soa(self);
   StackHandleScope<1> hs(self);
-  mirror::Class* annotation_array_class =
-      soa.Decode<mirror::Class>(WellKnownClasses::java_lang_annotation_Annotation__array).Ptr();
+  ObjPtr<mirror::Class> annotation_array_class =
+      soa.Decode<mirror::Class>(WellKnownClasses::java_lang_annotation_Annotation__array);
   mirror::Class* annotation_array_array_class =
       Runtime::Current()->GetClassLinker()->FindArrayClass(self, &annotation_array_class);
   if (annotation_array_array_class == nullptr) {
@@ -1049,7 +1049,7 @@
   StackHandleScope<5> hs(Thread::Current());
 
   // Extract the parameters' names String[].
-  mirror::Class* string_class = mirror::String::GetJavaLangString();
+  ObjPtr<mirror::Class> string_class = mirror::String::GetJavaLangString();
   Handle<mirror::Class> string_array_class(hs.NewHandle(
       Runtime::Current()->GetClassLinker()->FindArrayClass(Thread::Current(), &string_class)));
   if (UNLIKELY(string_array_class.Get() == nullptr)) {
@@ -1139,7 +1139,7 @@
     return nullptr;
   }
   StackHandleScope<1> hs(Thread::Current());
-  mirror::Class* class_class = mirror::Class::GetJavaLangClass();
+  ObjPtr<mirror::Class> class_class = mirror::Class::GetJavaLangClass();
   Handle<mirror::Class> class_array_class(hs.NewHandle(
       Runtime::Current()->GetClassLinker()->FindArrayClass(hs.Self(), &class_class)));
   if (class_array_class.Get() == nullptr) {
@@ -1316,7 +1316,7 @@
   }
 
   const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset());
-  DCHECK(code_item != nullptr) << PrettyMethod(method) << " " << dex_file->GetLocation();
+  DCHECK(code_item != nullptr) << method->PrettyMethod() << " " << dex_file->GetLocation();
 
   // A method with no line number info should return -1
   DexFile::LineNumFromPcContext context(rel_pc, -1);
diff --git a/runtime/dex_file_verifier.cc b/runtime/dex_file_verifier.cc
index 5132efc..be25803 100644
--- a/runtime/dex_file_verifier.cc
+++ b/runtime/dex_file_verifier.cc
@@ -25,7 +25,6 @@
 #include "dex_file-inl.h"
 #include "experimental_flags.h"
 #include "leb128.h"
-#include "runtime.h"
 #include "safe_map.h"
 #include "utf-inl.h"
 #include "utils.h"
@@ -454,6 +453,27 @@
   return result;
 }
 
+
+#define DECODE_UNSIGNED_CHECKED_FROM_WITH_ERROR_VALUE(ptr, var, error_value)  \
+  uint32_t var;                                                               \
+  if (!DecodeUnsignedLeb128Checked(&(ptr), begin_ + size_, &(var))) {         \
+    return error_value;                                                       \
+  }
+
+#define DECODE_UNSIGNED_CHECKED_FROM(ptr, var)                        \
+  uint32_t var;                                                       \
+  if (!DecodeUnsignedLeb128Checked(&(ptr), begin_ + size_, &(var))) { \
+    ErrorStringPrintf("Read out of bounds");                          \
+    return false;                                                     \
+  }
+
+#define DECODE_SIGNED_CHECKED_FROM(ptr, var)                        \
+  int32_t var;                                                      \
+  if (!DecodeSignedLeb128Checked(&(ptr), begin_ + size_, &(var))) { \
+    ErrorStringPrintf("Read out of bounds");                        \
+    return false;                                                   \
+  }
+
 bool DexFileVerifier::CheckAndGetHandlerOffsets(const DexFile::CodeItem* code_item,
                                                 uint32_t* handler_offsets, uint32_t handlers_size) {
   const uint8_t* handlers_base = DexFile::GetCatchHandlerData(*code_item, 0);
@@ -461,7 +481,7 @@
   for (uint32_t i = 0; i < handlers_size; i++) {
     bool catch_all;
     size_t offset = ptr_ - handlers_base;
-    int32_t size = DecodeSignedLeb128(&ptr_);
+    DECODE_SIGNED_CHECKED_FROM(ptr_, size);
 
     if (UNLIKELY((size < -65536) || (size > 65536))) {
       ErrorStringPrintf("Invalid exception handler size: %d", size);
@@ -478,12 +498,12 @@
     handler_offsets[i] = static_cast<uint32_t>(offset);
 
     while (size-- > 0) {
-      uint32_t type_idx = DecodeUnsignedLeb128(&ptr_);
+      DECODE_UNSIGNED_CHECKED_FROM(ptr_, type_idx);
       if (!CheckIndex(type_idx, header_->type_ids_size_, "handler type_idx")) {
         return false;
       }
 
-      uint32_t addr = DecodeUnsignedLeb128(&ptr_);
+      DECODE_UNSIGNED_CHECKED_FROM(ptr_, addr);
       if (UNLIKELY(addr >= code_item->insns_size_in_code_units_)) {
         ErrorStringPrintf("Invalid handler addr: %x", addr);
         return false;
@@ -491,7 +511,7 @@
     }
 
     if (catch_all) {
-      uint32_t addr = DecodeUnsignedLeb128(&ptr_);
+      DECODE_UNSIGNED_CHECKED_FROM(ptr_, addr);
       if (UNLIKELY(addr >= code_item->insns_size_in_code_units_)) {
         ErrorStringPrintf("Invalid handler catch_all_addr: %x", addr);
         return false;
@@ -726,7 +746,7 @@
 }
 
 bool DexFileVerifier::CheckEncodedArray() {
-  uint32_t size = DecodeUnsignedLeb128(&ptr_);
+  DECODE_UNSIGNED_CHECKED_FROM(ptr_, size);
 
   while (size--) {
     if (!CheckEncodedValue()) {
@@ -738,16 +758,16 @@
 }
 
 bool DexFileVerifier::CheckEncodedAnnotation() {
-  uint32_t idx = DecodeUnsignedLeb128(&ptr_);
-  if (!CheckIndex(idx, header_->type_ids_size_, "encoded_annotation type_idx")) {
+  DECODE_UNSIGNED_CHECKED_FROM(ptr_, anno_idx);
+  if (!CheckIndex(anno_idx, header_->type_ids_size_, "encoded_annotation type_idx")) {
     return false;
   }
 
-  uint32_t size = DecodeUnsignedLeb128(&ptr_);
+  DECODE_UNSIGNED_CHECKED_FROM(ptr_, size);
   uint32_t last_idx = 0;
 
   for (uint32_t i = 0; i < size; i++) {
-    idx = DecodeUnsignedLeb128(&ptr_);
+    DECODE_UNSIGNED_CHECKED_FROM(ptr_, idx);
     if (!CheckIndex(idx, header_->string_ids_size_, "annotation_element name_idx")) {
       return false;
     }
@@ -1002,7 +1022,7 @@
   }
 
   ptr_ = DexFile::GetCatchHandlerData(*code_item, 0);
-  uint32_t handlers_size = DecodeUnsignedLeb128(&ptr_);
+  DECODE_UNSIGNED_CHECKED_FROM(ptr_, handlers_size);
 
   if (UNLIKELY((handlers_size == 0) || (handlers_size >= 65536))) {
     ErrorStringPrintf("Invalid handlers_size: %ud", handlers_size);
@@ -1051,7 +1071,7 @@
 }
 
 bool DexFileVerifier::CheckIntraStringDataItem() {
-  uint32_t size = DecodeUnsignedLeb128(&ptr_);
+  DECODE_UNSIGNED_CHECKED_FROM(ptr_, size);
   const uint8_t* file_end = begin_ + size_;
 
   for (uint32_t i = 0; i < size; i++) {
@@ -1137,15 +1157,15 @@
 }
 
 bool DexFileVerifier::CheckIntraDebugInfoItem() {
-  DecodeUnsignedLeb128(&ptr_);
-  uint32_t parameters_size = DecodeUnsignedLeb128(&ptr_);
+  DECODE_UNSIGNED_CHECKED_FROM(ptr_, dummy);
+  DECODE_UNSIGNED_CHECKED_FROM(ptr_, parameters_size);
   if (UNLIKELY(parameters_size > 65536)) {
     ErrorStringPrintf("Invalid parameters_size: %x", parameters_size);
     return false;
   }
 
   for (uint32_t j = 0; j < parameters_size; j++) {
-    uint32_t parameter_name = DecodeUnsignedLeb128(&ptr_);
+    DECODE_UNSIGNED_CHECKED_FROM(ptr_, parameter_name);
     if (parameter_name != 0) {
       parameter_name--;
       if (!CheckIndex(parameter_name, header_->string_ids_size_, "debug_info_item parameter_name")) {
@@ -1161,27 +1181,27 @@
         return true;
       }
       case DexFile::DBG_ADVANCE_PC: {
-        DecodeUnsignedLeb128(&ptr_);
+        DECODE_UNSIGNED_CHECKED_FROM(ptr_, advance_pc_dummy);
         break;
       }
       case DexFile::DBG_ADVANCE_LINE: {
-        DecodeSignedLeb128(&ptr_);
+        DECODE_SIGNED_CHECKED_FROM(ptr_, advance_line_dummy);
         break;
       }
       case DexFile::DBG_START_LOCAL: {
-        uint32_t reg_num = DecodeUnsignedLeb128(&ptr_);
+        DECODE_UNSIGNED_CHECKED_FROM(ptr_, reg_num);
         if (UNLIKELY(reg_num >= 65536)) {
           ErrorStringPrintf("Bad reg_num for opcode %x", opcode);
           return false;
         }
-        uint32_t name_idx = DecodeUnsignedLeb128(&ptr_);
+        DECODE_UNSIGNED_CHECKED_FROM(ptr_, name_idx);
         if (name_idx != 0) {
           name_idx--;
           if (!CheckIndex(name_idx, header_->string_ids_size_, "DBG_START_LOCAL name_idx")) {
             return false;
           }
         }
-        uint32_t type_idx = DecodeUnsignedLeb128(&ptr_);
+        DECODE_UNSIGNED_CHECKED_FROM(ptr_, type_idx);
         if (type_idx != 0) {
           type_idx--;
           if (!CheckIndex(type_idx, header_->type_ids_size_, "DBG_START_LOCAL type_idx")) {
@@ -1192,7 +1212,7 @@
       }
       case DexFile::DBG_END_LOCAL:
       case DexFile::DBG_RESTART_LOCAL: {
-        uint32_t reg_num = DecodeUnsignedLeb128(&ptr_);
+        DECODE_UNSIGNED_CHECKED_FROM(ptr_, reg_num);
         if (UNLIKELY(reg_num >= 65536)) {
           ErrorStringPrintf("Bad reg_num for opcode %x", opcode);
           return false;
@@ -1200,26 +1220,26 @@
         break;
       }
       case DexFile::DBG_START_LOCAL_EXTENDED: {
-        uint32_t reg_num = DecodeUnsignedLeb128(&ptr_);
+        DECODE_UNSIGNED_CHECKED_FROM(ptr_, reg_num);
         if (UNLIKELY(reg_num >= 65536)) {
           ErrorStringPrintf("Bad reg_num for opcode %x", opcode);
           return false;
         }
-        uint32_t name_idx = DecodeUnsignedLeb128(&ptr_);
+        DECODE_UNSIGNED_CHECKED_FROM(ptr_, name_idx);
         if (name_idx != 0) {
           name_idx--;
           if (!CheckIndex(name_idx, header_->string_ids_size_, "DBG_START_LOCAL_EXTENDED name_idx")) {
             return false;
           }
         }
-        uint32_t type_idx = DecodeUnsignedLeb128(&ptr_);
+        DECODE_UNSIGNED_CHECKED_FROM(ptr_, type_idx);
         if (type_idx != 0) {
           type_idx--;
           if (!CheckIndex(type_idx, header_->type_ids_size_, "DBG_START_LOCAL_EXTENDED type_idx")) {
             return false;
           }
         }
-        uint32_t sig_idx = DecodeUnsignedLeb128(&ptr_);
+        DECODE_UNSIGNED_CHECKED_FROM(ptr_, sig_idx);
         if (sig_idx != 0) {
           sig_idx--;
           if (!CheckIndex(sig_idx, header_->string_ids_size_, "DBG_START_LOCAL_EXTENDED sig_idx")) {
@@ -1229,7 +1249,7 @@
         break;
       }
       case DexFile::DBG_SET_FILE: {
-        uint32_t name_idx = DecodeUnsignedLeb128(&ptr_);
+        DECODE_UNSIGNED_CHECKED_FROM(ptr_, name_idx);
         if (name_idx != 0) {
           name_idx--;
           if (!CheckIndex(name_idx, header_->string_ids_size_, "DBG_SET_FILE name_idx")) {
@@ -2127,7 +2147,7 @@
     const DexFile::AnnotationItem* annotation =
         reinterpret_cast<const DexFile::AnnotationItem*>(begin_ + *offsets);
     const uint8_t* data = annotation->annotation_;
-    uint32_t idx = DecodeUnsignedLeb128(&data);
+    DECODE_UNSIGNED_CHECKED_FROM(data, idx);
 
     if (UNLIKELY(last_idx >= idx && i != 0)) {
       ErrorStringPrintf("Out-of-order entry types: %x then %x", last_idx, idx);
@@ -2442,7 +2462,10 @@
   // Assume that the data is OK at this point. String data has been checked at this point.
 
   const uint8_t* ptr = begin + string_id->string_data_off_;
-  DecodeUnsignedLeb128(&ptr);
+  uint32_t dummy;
+  if (!DecodeUnsignedLeb128Checked(&ptr, begin + header->file_size_, &dummy)) {
+    return "(error)";
+  }
   return reinterpret_cast<const char*>(ptr);
 }
 
@@ -2604,7 +2627,11 @@
     return false;
   }
   const uint8_t* str_data_ptr = begin + string_off;
-  DecodeUnsignedLeb128(&str_data_ptr);
+  uint32_t dummy;
+  if (!DecodeUnsignedLeb128Checked(&str_data_ptr, begin + header->file_size_, &dummy)) {
+    *error_msg = "String size out of bounds for method flags verification";
+    return false;
+  }
   *str = reinterpret_cast<const char*>(str_data_ptr);
   return true;
 }
diff --git a/runtime/dex_file_verifier_test.cc b/runtime/dex_file_verifier_test.cc
index e392870..3801c22 100644
--- a/runtime/dex_file_verifier_test.cc
+++ b/runtime/dex_file_verifier_test.cc
@@ -58,7 +58,7 @@
 
   void VerifyModification(const char* dex_file_base64_content,
                           const char* location,
-                          std::function<void(DexFile*)> f,
+                          const std::function<void(DexFile*)>& f,
                           const char* expected_error) {
     size_t length;
     std::unique_ptr<uint8_t[]> dex_bytes(DecodeBase64(dex_file_base64_content, &length));
diff --git a/runtime/dex_instruction.cc b/runtime/dex_instruction.cc
index c31d236..c766b54 100644
--- a/runtime/dex_instruction.cc
+++ b/runtime/dex_instruction.cc
@@ -209,7 +209,7 @@
         case NEW_INSTANCE:
           if (file != nullptr) {
             uint32_t type_idx = VRegB_21c();
-            os << opcode << " v" << static_cast<int>(VRegA_21c()) << ", " << PrettyType(type_idx, *file)
+            os << opcode << " v" << static_cast<int>(VRegA_21c()) << ", " << file->PrettyType(type_idx)
                << " // type@" << type_idx;
             break;
           }
@@ -223,7 +223,7 @@
         case SGET_SHORT:
           if (file != nullptr) {
             uint32_t field_idx = VRegB_21c();
-            os << opcode << "  v" << static_cast<int>(VRegA_21c()) << ", " << PrettyField(field_idx, *file, true)
+            os << opcode << "  v" << static_cast<int>(VRegA_21c()) << ", " << file->PrettyField(field_idx, true)
                << " // field@" << field_idx;
             break;
           }
@@ -237,7 +237,7 @@
         case SPUT_SHORT:
           if (file != nullptr) {
             uint32_t field_idx = VRegB_21c();
-            os << opcode << " v" << static_cast<int>(VRegA_21c()) << ", " << PrettyField(field_idx, *file, true)
+            os << opcode << " v" << static_cast<int>(VRegA_21c()) << ", " << file->PrettyField(field_idx, true)
                << " // field@" << field_idx;
             break;
           }
@@ -264,7 +264,7 @@
           if (file != nullptr) {
             uint32_t field_idx = VRegC_22c();
             os << opcode << " v" << static_cast<int>(VRegA_22c()) << ", v" << static_cast<int>(VRegB_22c()) << ", "
-               << PrettyField(field_idx, *file, true) << " // field@" << field_idx;
+               << file->PrettyField(field_idx, true) << " // field@" << field_idx;
             break;
           }
           FALLTHROUGH_INTENDED;
@@ -287,7 +287,7 @@
           if (file != nullptr) {
             uint32_t field_idx = VRegC_22c();
             os << opcode << " v" << static_cast<int>(VRegA_22c()) << ", v" << static_cast<int>(VRegB_22c()) << ", "
-               << PrettyField(field_idx, *file, true) << " // field@" << field_idx;
+               << file->PrettyField(field_idx, true) << " // field@" << field_idx;
             break;
           }
           FALLTHROUGH_INTENDED;
@@ -304,7 +304,7 @@
           if (file != nullptr) {
             uint32_t type_idx = VRegC_22c();
             os << opcode << " v" << static_cast<int>(VRegA_22c()) << ", v" << static_cast<int>(VRegB_22c()) << ", "
-               << PrettyType(type_idx, *file) << " // type@" << type_idx;
+               << file->PrettyType(type_idx) << " // type@" << type_idx;
             break;
           }
           FALLTHROUGH_INTENDED;
@@ -312,7 +312,7 @@
           if (file != nullptr) {
             uint32_t type_idx = VRegC_22c();
             os << opcode << " v" << static_cast<int>(VRegA_22c()) << ", v" << static_cast<int>(VRegB_22c()) << ", "
-               << PrettyType(type_idx, *file) << " // type@" << type_idx;
+               << file->PrettyType(type_idx) << " // type@" << type_idx;
             break;
           }
           FALLTHROUGH_INTENDED;
@@ -382,7 +382,7 @@
               }
               os << "v" << arg[i];
             }
-            os << "}, " << PrettyMethod(method_idx, *file) << " // method@" << method_idx;
+            os << "}, " << file->PrettyMethod(method_idx) << " // method@" << method_idx;
             break;
           }
           FALLTHROUGH_INTENDED;
@@ -417,7 +417,7 @@
           if (file != nullptr) {
             uint32_t method_idx = VRegB_3rc();
             os << StringPrintf("%s, {v%d .. v%d}, ", opcode, VRegC_3rc(), (VRegC_3rc() + VRegA_3rc() - 1))
-               << PrettyMethod(method_idx, *file) << " // method@" << method_idx;
+               << file->PrettyMethod(method_idx) << " // method@" << method_idx;
             break;
           }
           FALLTHROUGH_INTENDED;
diff --git a/runtime/dex_instruction.h b/runtime/dex_instruction.h
index f437fde..8eb1a79 100644
--- a/runtime/dex_instruction.h
+++ b/runtime/dex_instruction.h
@@ -126,13 +126,14 @@
 
   enum IndexType {
     kIndexUnknown = 0,
-    kIndexNone,          // has no index
-    kIndexTypeRef,       // type reference index
-    kIndexStringRef,     // string reference index
-    kIndexMethodRef,     // method reference index
-    kIndexFieldRef,      // field reference index
-    kIndexFieldOffset,   // field offset (for static linked fields)
-    kIndexVtableOffset   // vtable offset (for static linked methods)
+    kIndexNone,              // has no index
+    kIndexTypeRef,           // type reference index
+    kIndexStringRef,         // string reference index
+    kIndexMethodRef,         // method reference index
+    kIndexFieldRef,          // field reference index
+    kIndexFieldOffset,       // field offset (for static linked fields)
+    kIndexVtableOffset,      // vtable offset (for static linked methods)
+    kIndexMethodAndProtoRef  // method and a proto reference index (for invoke-polymorphic)
   };
 
   enum Flags {
diff --git a/runtime/dex_instruction_list.h b/runtime/dex_instruction_list.h
index 3194c1a..e537afe 100644
--- a/runtime/dex_instruction_list.h
+++ b/runtime/dex_instruction_list.h
@@ -269,8 +269,8 @@
   V(0xF7, UNUSED_F7, "unused-f7", k10x, kIndexUnknown, 0, kVerifyError) \
   V(0xF8, UNUSED_F8, "unused-f8", k10x, kIndexUnknown, 0, kVerifyError) \
   V(0xF9, UNUSED_F9, "unused-f9", k10x, kIndexUnknown, 0, kVerifyError) \
-  V(0xFA, INVOKE_POLYMORPHIC, "invoke-polymorphic", k45cc, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero | kExperimental) \
-  V(0xFB, INVOKE_POLYMORPHIC_RANGE, "invoke-polymorphic/range", k4rcc, kIndexMethodRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero | kExperimental) \
+  V(0xFA, INVOKE_POLYMORPHIC, "invoke-polymorphic", k45cc, kIndexMethodAndProtoRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgNonZero | kExperimental) \
+  V(0xFB, INVOKE_POLYMORPHIC_RANGE, "invoke-polymorphic/range", k4rcc, kIndexMethodAndProtoRef, kContinue | kThrow | kInvoke, kVerifyRegBMethod | kVerifyVarArgRangeNonZero | kExperimental) \
   V(0xFC, UNUSED_FC, "unused-fc", k10x, kIndexUnknown, 0, kVerifyError) \
   V(0xFD, UNUSED_FD, "unused-fd", k10x, kIndexUnknown, 0, kVerifyError) \
   V(0xFE, UNUSED_FE, "unused-fe", k10x, kIndexUnknown, 0, kVerifyError) \
diff --git a/runtime/dex_method_iterator_test.cc b/runtime/dex_method_iterator_test.cc
index 9f28c8c..cd8c390 100644
--- a/runtime/dex_method_iterator_test.cc
+++ b/runtime/dex_method_iterator_test.cc
@@ -40,7 +40,7 @@
     InvokeType invoke_type = it.GetInvokeType();
     uint32_t method_idx = it.GetMemberIndex();
     if ((false)) {
-      LOG(INFO) << invoke_type << " " << PrettyMethod(method_idx, dex_file);
+      LOG(INFO) << invoke_type << " " << dex_file.PrettyMethod(method_idx);
     }
     it.Next();
   }
diff --git a/runtime/elf_file.cc b/runtime/elf_file.cc
index 096f003..2ea7bb6 100644
--- a/runtime/elf_file.cc
+++ b/runtime/elf_file.cc
@@ -36,8 +36,7 @@
 ElfFileImpl<ElfTypes>::ElfFileImpl(File* file, bool writable,
                                    bool program_header_only,
                                    uint8_t* requested_base)
-  : file_(file),
-    writable_(writable),
+  : writable_(writable),
     program_header_only_(program_header_only),
     header_(nullptr),
     base_address_(nullptr),
@@ -74,7 +73,7 @@
     prot = PROT_READ;
     flags = MAP_PRIVATE;
   }
-  if (!elf_file->Setup(prot, flags, low_4gb, error_msg)) {
+  if (!elf_file->Setup(file, prot, flags, low_4gb, error_msg)) {
     return nullptr;
   }
   return elf_file.release();
@@ -89,39 +88,44 @@
   std::unique_ptr<ElfFileImpl<ElfTypes>> elf_file(new ElfFileImpl<ElfTypes>
       (file, (prot & PROT_WRITE) == PROT_WRITE, /*program_header_only*/false,
       /*requested_base*/nullptr));
-  if (!elf_file->Setup(prot, flags, low_4gb, error_msg)) {
+  if (!elf_file->Setup(file, prot, flags, low_4gb, error_msg)) {
     return nullptr;
   }
   return elf_file.release();
 }
 
 template <typename ElfTypes>
-bool ElfFileImpl<ElfTypes>::Setup(int prot, int flags, bool low_4gb, std::string* error_msg) {
-  int64_t temp_file_length = file_->GetLength();
+bool ElfFileImpl<ElfTypes>::Setup(File* file,
+                                  int prot,
+                                  int flags,
+                                  bool low_4gb,
+                                  std::string* error_msg) {
+  int64_t temp_file_length = file->GetLength();
   if (temp_file_length < 0) {
     errno = -temp_file_length;
     *error_msg = StringPrintf("Failed to get length of file: '%s' fd=%d: %s",
-                              file_->GetPath().c_str(), file_->Fd(), strerror(errno));
+                              file->GetPath().c_str(), file->Fd(), strerror(errno));
     return false;
   }
   size_t file_length = static_cast<size_t>(temp_file_length);
   if (file_length < sizeof(Elf_Ehdr)) {
     *error_msg = StringPrintf("File size of %zd bytes not large enough to contain ELF header of "
                               "%zd bytes: '%s'", file_length, sizeof(Elf_Ehdr),
-                              file_->GetPath().c_str());
+                              file->GetPath().c_str());
     return false;
   }
 
   if (program_header_only_) {
     // first just map ELF header to get program header size information
     size_t elf_header_size = sizeof(Elf_Ehdr);
-    if (!SetMap(MemMap::MapFile(elf_header_size,
+    if (!SetMap(file,
+                MemMap::MapFile(elf_header_size,
                                 prot,
                                 flags,
-                                file_->Fd(),
+                                file->Fd(),
                                 0,
                                 low_4gb,
-                                file_->GetPath().c_str(),
+                                file->GetPath().c_str(),
                                 error_msg),
                 error_msg)) {
       return false;
@@ -131,16 +135,17 @@
     if (file_length < program_header_size) {
       *error_msg = StringPrintf("File size of %zd bytes not large enough to contain ELF program "
                                 "header of %zd bytes: '%s'", file_length,
-                                sizeof(Elf_Ehdr), file_->GetPath().c_str());
+                                sizeof(Elf_Ehdr), file->GetPath().c_str());
       return false;
     }
-    if (!SetMap(MemMap::MapFile(program_header_size,
+    if (!SetMap(file,
+                MemMap::MapFile(program_header_size,
                                 prot,
                                 flags,
-                                file_->Fd(),
+                                file->Fd(),
                                 0,
                                 low_4gb,
-                                file_->GetPath().c_str(),
+                                file->GetPath().c_str(),
                                 error_msg),
                 error_msg)) {
       *error_msg = StringPrintf("Failed to map ELF program headers: %s", error_msg->c_str());
@@ -148,13 +153,14 @@
     }
   } else {
     // otherwise map entire file
-    if (!SetMap(MemMap::MapFile(file_->GetLength(),
+    if (!SetMap(file,
+                MemMap::MapFile(file->GetLength(),
                                 prot,
                                 flags,
-                                file_->Fd(),
+                                file->Fd(),
                                 0,
                                 low_4gb,
-                                file_->GetPath().c_str(),
+                                file->GetPath().c_str(),
                                 error_msg),
                 error_msg)) {
       *error_msg = StringPrintf("Failed to map ELF file: %s", error_msg->c_str());
@@ -178,7 +184,7 @@
     Elf_Shdr* shstrtab_section_header = GetSectionNameStringSection();
     if (shstrtab_section_header == nullptr) {
       *error_msg = StringPrintf("Failed to find shstrtab section header in ELF file: '%s'",
-                                file_->GetPath().c_str());
+                                file->GetPath().c_str());
       return false;
     }
 
@@ -186,7 +192,7 @@
     dynamic_program_header_ = FindProgamHeaderByType(PT_DYNAMIC);
     if (dynamic_program_header_ == nullptr) {
       *error_msg = StringPrintf("Failed to find PT_DYNAMIC program header in ELF file: '%s'",
-                                file_->GetPath().c_str());
+                                file->GetPath().c_str());
       return false;
     }
 
@@ -200,7 +206,7 @@
       Elf_Shdr* section_header = GetSectionHeader(i);
       if (section_header == nullptr) {
         *error_msg = StringPrintf("Failed to find section header for section %d in ELF file: '%s'",
-                                  i, file_->GetPath().c_str());
+                                  i, file->GetPath().c_str());
         return false;
       }
       switch (section_header->sh_type) {
@@ -245,7 +251,7 @@
           if (reinterpret_cast<uint8_t*>(dynamic_section_start_) !=
               Begin() + section_header->sh_offset) {
             LOG(WARNING) << "Failed to find matching SHT_DYNAMIC for PT_DYNAMIC in "
-                         << file_->GetPath() << ": " << std::hex
+                         << file->GetPath() << ": " << std::hex
                          << reinterpret_cast<void*>(dynamic_section_start_)
                          << " != " << reinterpret_cast<void*>(Begin() + section_header->sh_offset);
             return false;
@@ -263,7 +269,7 @@
     }
 
     // Check for the existence of some sections.
-    if (!CheckSectionsExist(error_msg)) {
+    if (!CheckSectionsExist(file, error_msg)) {
       return false;
     }
   }
@@ -283,7 +289,7 @@
                                         uint8_t** target, std::string* error_msg) {
   if (Begin() + offset >= End()) {
     *error_msg = StringPrintf("Offset %d is out of range for %s in ELF file: '%s'", offset, label,
-                              file_->GetPath().c_str());
+                              file_path_.c_str());
     return false;
   }
   *target = Begin() + offset;
@@ -324,11 +330,11 @@
 }
 
 template <typename ElfTypes>
-bool ElfFileImpl<ElfTypes>::CheckSectionsExist(std::string* error_msg) const {
+  bool ElfFileImpl<ElfTypes>::CheckSectionsExist(File* file, std::string* error_msg) const {
   if (!program_header_only_) {
     // If in full mode, need section headers.
     if (section_headers_start_ == nullptr) {
-      *error_msg = StringPrintf("No section headers in ELF file: '%s'", file_->GetPath().c_str());
+      *error_msg = StringPrintf("No section headers in ELF file: '%s'", file->GetPath().c_str());
       return false;
     }
   }
@@ -336,14 +342,14 @@
   // This is redundant, but defensive.
   if (dynamic_program_header_ == nullptr) {
     *error_msg = StringPrintf("Failed to find PT_DYNAMIC program header in ELF file: '%s'",
-                              file_->GetPath().c_str());
+                              file->GetPath().c_str());
     return false;
   }
 
   // Need a dynamic section. This is redundant, but defensive.
   if (dynamic_section_start_ == nullptr) {
     *error_msg = StringPrintf("Failed to find dynamic section in ELF file: '%s'",
-                              file_->GetPath().c_str());
+                              file->GetPath().c_str());
     return false;
   }
 
@@ -352,7 +358,7 @@
   if (symtab_section_start_ != nullptr) {
     // When there's a symtab, there should be a strtab.
     if (strtab_section_start_ == nullptr) {
-      *error_msg = StringPrintf("No strtab for symtab in ELF file: '%s'", file_->GetPath().c_str());
+      *error_msg = StringPrintf("No strtab for symtab in ELF file: '%s'", file->GetPath().c_str());
       return false;
     }
 
@@ -360,25 +366,25 @@
     if (!CheckSectionsLinked(reinterpret_cast<const uint8_t*>(symtab_section_start_),
                              reinterpret_cast<const uint8_t*>(strtab_section_start_))) {
       *error_msg = StringPrintf("Symtab is not linked to the strtab in ELF file: '%s'",
-                                file_->GetPath().c_str());
+                                file->GetPath().c_str());
       return false;
     }
   }
 
   // We always need a dynstr & dynsym.
   if (dynstr_section_start_ == nullptr) {
-    *error_msg = StringPrintf("No dynstr in ELF file: '%s'", file_->GetPath().c_str());
+    *error_msg = StringPrintf("No dynstr in ELF file: '%s'", file->GetPath().c_str());
     return false;
   }
   if (dynsym_section_start_ == nullptr) {
-    *error_msg = StringPrintf("No dynsym in ELF file: '%s'", file_->GetPath().c_str());
+    *error_msg = StringPrintf("No dynsym in ELF file: '%s'", file->GetPath().c_str());
     return false;
   }
 
   // Need a hash section for dynamic symbol lookup.
   if (hash_section_start_ == nullptr) {
     *error_msg = StringPrintf("Failed to find hash section in ELF file: '%s'",
-                              file_->GetPath().c_str());
+                              file->GetPath().c_str());
     return false;
   }
 
@@ -386,7 +392,7 @@
   if (!CheckSectionsLinked(reinterpret_cast<const uint8_t*>(hash_section_start_),
                            reinterpret_cast<const uint8_t*>(dynsym_section_start_))) {
     *error_msg = StringPrintf("Hash section is not linked to the dynstr in ELF file: '%s'",
-                              file_->GetPath().c_str());
+                              file->GetPath().c_str());
     return false;
   }
 
@@ -397,9 +403,9 @@
     // It might not be mapped, but we can compare against the file size.
     int64_t offset = static_cast<int64_t>(GetHeader().e_shoff +
                                           (GetHeader().e_shstrndx * GetHeader().e_shentsize));
-    if (offset >= file_->GetLength()) {
+    if (offset >= file->GetLength()) {
       *error_msg = StringPrintf("Shstrtab is not in the mapped ELF file: '%s'",
-                                file_->GetPath().c_str());
+                                file->GetPath().c_str());
       return false;
     }
   }
@@ -408,15 +414,15 @@
 }
 
 template <typename ElfTypes>
-bool ElfFileImpl<ElfTypes>::SetMap(MemMap* map, std::string* error_msg) {
+bool ElfFileImpl<ElfTypes>::SetMap(File* file, MemMap* map, std::string* error_msg) {
   if (map == nullptr) {
     // MemMap::Open should have already set an error.
     DCHECK(!error_msg->empty());
     return false;
   }
   map_.reset(map);
-  CHECK(map_.get() != nullptr) << file_->GetPath();
-  CHECK(map_->Begin() != nullptr) << file_->GetPath();
+  CHECK(map_.get() != nullptr) << file->GetPath();
+  CHECK(map_->Begin() != nullptr) << file->GetPath();
 
   header_ = reinterpret_cast<Elf_Ehdr*>(map_->Begin());
   if ((ELFMAG0 != header_->e_ident[EI_MAG0])
@@ -425,7 +431,7 @@
       || (ELFMAG3 != header_->e_ident[EI_MAG3])) {
     *error_msg = StringPrintf("Failed to find ELF magic value %d %d %d %d in %s, found %d %d %d %d",
                               ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
-                              file_->GetPath().c_str(),
+                              file->GetPath().c_str(),
                               header_->e_ident[EI_MAG0],
                               header_->e_ident[EI_MAG1],
                               header_->e_ident[EI_MAG2],
@@ -436,90 +442,90 @@
   if (elf_class != header_->e_ident[EI_CLASS]) {
     *error_msg = StringPrintf("Failed to find expected EI_CLASS value %d in %s, found %d",
                               elf_class,
-                              file_->GetPath().c_str(),
+                              file->GetPath().c_str(),
                               header_->e_ident[EI_CLASS]);
     return false;
   }
   if (ELFDATA2LSB != header_->e_ident[EI_DATA]) {
     *error_msg = StringPrintf("Failed to find expected EI_DATA value %d in %s, found %d",
                               ELFDATA2LSB,
-                              file_->GetPath().c_str(),
+                              file->GetPath().c_str(),
                               header_->e_ident[EI_CLASS]);
     return false;
   }
   if (EV_CURRENT != header_->e_ident[EI_VERSION]) {
     *error_msg = StringPrintf("Failed to find expected EI_VERSION value %d in %s, found %d",
                               EV_CURRENT,
-                              file_->GetPath().c_str(),
+                              file->GetPath().c_str(),
                               header_->e_ident[EI_CLASS]);
     return false;
   }
   if (ET_DYN != header_->e_type) {
     *error_msg = StringPrintf("Failed to find expected e_type value %d in %s, found %d",
                               ET_DYN,
-                              file_->GetPath().c_str(),
+                              file->GetPath().c_str(),
                               header_->e_type);
     return false;
   }
   if (EV_CURRENT != header_->e_version) {
     *error_msg = StringPrintf("Failed to find expected e_version value %d in %s, found %d",
                               EV_CURRENT,
-                              file_->GetPath().c_str(),
+                              file->GetPath().c_str(),
                               header_->e_version);
     return false;
   }
   if (0 != header_->e_entry) {
     *error_msg = StringPrintf("Failed to find expected e_entry value %d in %s, found %d",
                               0,
-                              file_->GetPath().c_str(),
+                              file->GetPath().c_str(),
                               static_cast<int32_t>(header_->e_entry));
     return false;
   }
   if (0 == header_->e_phoff) {
     *error_msg = StringPrintf("Failed to find non-zero e_phoff value in %s",
-                              file_->GetPath().c_str());
+                              file->GetPath().c_str());
     return false;
   }
   if (0 == header_->e_shoff) {
     *error_msg = StringPrintf("Failed to find non-zero e_shoff value in %s",
-                              file_->GetPath().c_str());
+                              file->GetPath().c_str());
     return false;
   }
   if (0 == header_->e_ehsize) {
     *error_msg = StringPrintf("Failed to find non-zero e_ehsize value in %s",
-                              file_->GetPath().c_str());
+                              file->GetPath().c_str());
     return false;
   }
   if (0 == header_->e_phentsize) {
     *error_msg = StringPrintf("Failed to find non-zero e_phentsize value in %s",
-                              file_->GetPath().c_str());
+                              file->GetPath().c_str());
     return false;
   }
   if (0 == header_->e_phnum) {
     *error_msg = StringPrintf("Failed to find non-zero e_phnum value in %s",
-                              file_->GetPath().c_str());
+                              file->GetPath().c_str());
     return false;
   }
   if (0 == header_->e_shentsize) {
     *error_msg = StringPrintf("Failed to find non-zero e_shentsize value in %s",
-                              file_->GetPath().c_str());
+                              file->GetPath().c_str());
     return false;
   }
   if (0 == header_->e_shnum) {
     *error_msg = StringPrintf("Failed to find non-zero e_shnum value in %s",
-                              file_->GetPath().c_str());
+                              file->GetPath().c_str());
     return false;
   }
   if (0 == header_->e_shstrndx) {
     *error_msg = StringPrintf("Failed to find non-zero e_shstrndx value in %s",
-                              file_->GetPath().c_str());
+                              file->GetPath().c_str());
     return false;
   }
   if (header_->e_shstrndx >= header_->e_shnum) {
     *error_msg = StringPrintf("Failed to find e_shnum value %d less than %d in %s",
                               header_->e_shstrndx,
                               header_->e_shnum,
-                              file_->GetPath().c_str());
+                              file->GetPath().c_str());
     return false;
   }
 
@@ -528,14 +534,14 @@
       *error_msg = StringPrintf("Failed to find e_phoff value %" PRIu64 " less than %zd in %s",
                                 static_cast<uint64_t>(header_->e_phoff),
                                 Size(),
-                                file_->GetPath().c_str());
+                                file->GetPath().c_str());
       return false;
     }
     if (header_->e_shoff >= Size()) {
       *error_msg = StringPrintf("Failed to find e_shoff value %" PRIu64 " less than %zd in %s",
                                 static_cast<uint64_t>(header_->e_shoff),
                                 Size(),
-                                file_->GetPath().c_str());
+                                file->GetPath().c_str());
       return false;
     }
   }
@@ -577,7 +583,7 @@
 template <typename ElfTypes>
 typename ElfTypes::Sym* ElfFileImpl<ElfTypes>::GetSymbolSectionStart(
     Elf_Word section_type) const {
-  CHECK(IsSymbolSectionType(section_type)) << file_->GetPath() << " " << section_type;
+  CHECK(IsSymbolSectionType(section_type)) << file_path_ << " " << section_type;
   switch (section_type) {
     case SHT_SYMTAB: {
       return symtab_section_start_;
@@ -597,7 +603,7 @@
 template <typename ElfTypes>
 const char* ElfFileImpl<ElfTypes>::GetStringSectionStart(
     Elf_Word section_type) const {
-  CHECK(IsSymbolSectionType(section_type)) << file_->GetPath() << " " << section_type;
+  CHECK(IsSymbolSectionType(section_type)) << file_path_ << " " << section_type;
   switch (section_type) {
     case SHT_SYMTAB: {
       return strtab_section_start_;
@@ -615,7 +621,7 @@
 template <typename ElfTypes>
 const char* ElfFileImpl<ElfTypes>::GetString(Elf_Word section_type,
                                              Elf_Word i) const {
-  CHECK(IsSymbolSectionType(section_type)) << file_->GetPath() << " " << section_type;
+  CHECK(IsSymbolSectionType(section_type)) << file_path_ << " " << section_type;
   if (i == 0) {
     return nullptr;
   }
@@ -673,7 +679,7 @@
 
 template <typename ElfTypes>
 typename ElfTypes::Phdr* ElfFileImpl<ElfTypes>::GetProgramHeader(Elf_Word i) const {
-  CHECK_LT(i, GetProgramHeaderNum()) << file_->GetPath();  // Sanity check for caller.
+  CHECK_LT(i, GetProgramHeaderNum()) << file_path_;  // Sanity check for caller.
   uint8_t* program_header = GetProgramHeadersStart() + (i * GetHeader().e_phentsize);
   if (program_header >= End()) {
     return nullptr;  // Failure condition.
@@ -701,7 +707,7 @@
 typename ElfTypes::Shdr* ElfFileImpl<ElfTypes>::GetSectionHeader(Elf_Word i) const {
   // Can only access arbitrary sections when we have the whole file, not just program header.
   // Even if we Load(), it doesn't bring in all the sections.
-  CHECK(!program_header_only_) << file_->GetPath();
+  CHECK(!program_header_only_) << file_path_;
   if (i >= GetSectionHeaderNum()) {
     return nullptr;  // Failure condition.
   }
@@ -716,7 +722,7 @@
 typename ElfTypes::Shdr* ElfFileImpl<ElfTypes>::FindSectionByType(Elf_Word type) const {
   // Can only access arbitrary sections when we have the whole file, not just program header.
   // We could change this to switch on known types if they were detected during loading.
-  CHECK(!program_header_only_) << file_->GetPath();
+  CHECK(!program_header_only_) << file_path_;
   for (Elf_Word i = 0; i < GetSectionHeaderNum(); i++) {
     Elf_Shdr* section_header = GetSectionHeader(i);
     if (section_header->sh_type == type) {
@@ -802,8 +808,8 @@
 template <typename ElfTypes>
 typename ElfTypes::Word ElfFileImpl<ElfTypes>::GetSymbolNum(Elf_Shdr& section_header) const {
   CHECK(IsSymbolSectionType(section_header.sh_type))
-      << file_->GetPath() << " " << section_header.sh_type;
-  CHECK_NE(0U, section_header.sh_entsize) << file_->GetPath();
+      << file_path_ << " " << section_header.sh_type;
+  CHECK_NE(0U, section_header.sh_entsize) << file_path_;
   return section_header.sh_size / section_header.sh_entsize;
 }
 
@@ -819,7 +825,7 @@
 template <typename ElfTypes>
 typename ElfFileImpl<ElfTypes>::SymbolTable**
 ElfFileImpl<ElfTypes>::GetSymbolTable(Elf_Word section_type) {
-  CHECK(IsSymbolSectionType(section_type)) << file_->GetPath() << " " << section_type;
+  CHECK(IsSymbolSectionType(section_type)) << file_path_ << " " << section_type;
   switch (section_type) {
     case SHT_SYMTAB: {
       return &symtab_symbol_table_;
@@ -837,8 +843,8 @@
 template <typename ElfTypes>
 typename ElfTypes::Sym* ElfFileImpl<ElfTypes>::FindSymbolByName(
     Elf_Word section_type, const std::string& symbol_name, bool build_map) {
-  CHECK(!program_header_only_) << file_->GetPath();
-  CHECK(IsSymbolSectionType(section_type)) << file_->GetPath() << " " << section_type;
+  CHECK(!program_header_only_) << file_path_;
+  CHECK(IsSymbolSectionType(section_type)) << file_path_ << " " << section_type;
 
   SymbolTable** symbol_table = GetSymbolTable(section_type);
   if (*symbol_table != nullptr || build_map) {
@@ -928,7 +934,7 @@
 template <typename ElfTypes>
 const char* ElfFileImpl<ElfTypes>::GetString(Elf_Shdr& string_section,
                                              Elf_Word i) const {
-  CHECK(!program_header_only_) << file_->GetPath();
+  CHECK(!program_header_only_) << file_path_;
   // TODO: remove this static_cast from enum when using -std=gnu++0x
   if (static_cast<Elf_Word>(SHT_STRTAB) != string_section.sh_type) {
     return nullptr;  // Failure condition.
@@ -954,7 +960,7 @@
 
 template <typename ElfTypes>
 typename ElfTypes::Dyn& ElfFileImpl<ElfTypes>::GetDynamic(Elf_Word i) const {
-  CHECK_LT(i, GetDynamicNum()) << file_->GetPath();
+  CHECK_LT(i, GetDynamicNum()) << file_path_;
   return *(GetDynamicSectionStart() + i);
 }
 
@@ -981,40 +987,40 @@
 
 template <typename ElfTypes>
 typename ElfTypes::Rel* ElfFileImpl<ElfTypes>::GetRelSectionStart(Elf_Shdr& section_header) const {
-  CHECK(SHT_REL == section_header.sh_type) << file_->GetPath() << " " << section_header.sh_type;
+  CHECK(SHT_REL == section_header.sh_type) << file_path_ << " " << section_header.sh_type;
   return reinterpret_cast<Elf_Rel*>(Begin() + section_header.sh_offset);
 }
 
 template <typename ElfTypes>
 typename ElfTypes::Word ElfFileImpl<ElfTypes>::GetRelNum(Elf_Shdr& section_header) const {
-  CHECK(SHT_REL == section_header.sh_type) << file_->GetPath() << " " << section_header.sh_type;
-  CHECK_NE(0U, section_header.sh_entsize) << file_->GetPath();
+  CHECK(SHT_REL == section_header.sh_type) << file_path_ << " " << section_header.sh_type;
+  CHECK_NE(0U, section_header.sh_entsize) << file_path_;
   return section_header.sh_size / section_header.sh_entsize;
 }
 
 template <typename ElfTypes>
 typename ElfTypes::Rel& ElfFileImpl<ElfTypes>::GetRel(Elf_Shdr& section_header, Elf_Word i) const {
-  CHECK(SHT_REL == section_header.sh_type) << file_->GetPath() << " " << section_header.sh_type;
-  CHECK_LT(i, GetRelNum(section_header)) << file_->GetPath();
+  CHECK(SHT_REL == section_header.sh_type) << file_path_ << " " << section_header.sh_type;
+  CHECK_LT(i, GetRelNum(section_header)) << file_path_;
   return *(GetRelSectionStart(section_header) + i);
 }
 
 template <typename ElfTypes>
 typename ElfTypes::Rela* ElfFileImpl<ElfTypes>::GetRelaSectionStart(Elf_Shdr& section_header) const {
-  CHECK(SHT_RELA == section_header.sh_type) << file_->GetPath() << " " << section_header.sh_type;
+  CHECK(SHT_RELA == section_header.sh_type) << file_path_ << " " << section_header.sh_type;
   return reinterpret_cast<Elf_Rela*>(Begin() + section_header.sh_offset);
 }
 
 template <typename ElfTypes>
 typename ElfTypes::Word ElfFileImpl<ElfTypes>::GetRelaNum(Elf_Shdr& section_header) const {
-  CHECK(SHT_RELA == section_header.sh_type) << file_->GetPath() << " " << section_header.sh_type;
+  CHECK(SHT_RELA == section_header.sh_type) << file_path_ << " " << section_header.sh_type;
   return section_header.sh_size / section_header.sh_entsize;
 }
 
 template <typename ElfTypes>
 typename ElfTypes::Rela& ElfFileImpl<ElfTypes>::GetRela(Elf_Shdr& section_header, Elf_Word i) const {
-  CHECK(SHT_RELA == section_header.sh_type) << file_->GetPath() << " " << section_header.sh_type;
-  CHECK_LT(i, GetRelaNum(section_header)) << file_->GetPath();
+  CHECK(SHT_RELA == section_header.sh_type) << file_path_ << " " << section_header.sh_type;
+  CHECK_LT(i, GetRelaNum(section_header)) << file_path_;
   return *(GetRelaSectionStart(section_header) + i);
 }
 
@@ -1037,7 +1043,7 @@
       std::ostringstream oss;
       oss << "Program header #" << i << " has overflow in p_vaddr+p_memsz: 0x" << std::hex
           << program_header->p_vaddr << "+0x" << program_header->p_memsz << "=0x" << end_vaddr
-          << " in ELF file \"" << file_->GetPath() << "\"";
+          << " in ELF file \"" << file_path_ << "\"";
       *error_msg = oss.str();
       *size = static_cast<size_t>(-1);
       return false;
@@ -1048,13 +1054,13 @@
   }
   min_vaddr = RoundDown(min_vaddr, kPageSize);
   max_vaddr = RoundUp(max_vaddr, kPageSize);
-  CHECK_LT(min_vaddr, max_vaddr) << file_->GetPath();
+  CHECK_LT(min_vaddr, max_vaddr) << file_path_;
   Elf_Addr loaded_size = max_vaddr - min_vaddr;
   // Check that the loaded_size fits in size_t.
   if (UNLIKELY(loaded_size > std::numeric_limits<size_t>::max())) {
     std::ostringstream oss;
     oss << "Loaded size is 0x" << std::hex << loaded_size << " but maximum size_t is 0x"
-        << std::numeric_limits<size_t>::max() << " for ELF file \"" << file_->GetPath() << "\"";
+        << std::numeric_limits<size_t>::max() << " for ELF file \"" << file_path_ << "\"";
     *error_msg = oss.str();
     *size = static_cast<size_t>(-1);
     return false;
@@ -1064,8 +1070,11 @@
 }
 
 template <typename ElfTypes>
-bool ElfFileImpl<ElfTypes>::Load(bool executable, bool low_4gb, std::string* error_msg) {
-  CHECK(program_header_only_) << file_->GetPath();
+bool ElfFileImpl<ElfTypes>::Load(File* file,
+                                 bool executable,
+                                 bool low_4gb,
+                                 std::string* error_msg) {
+  CHECK(program_header_only_) << file->GetPath();
 
   if (executable) {
     InstructionSet elf_ISA = GetInstructionSetFromELF(GetHeader().e_machine, GetHeader().e_flags);
@@ -1082,7 +1091,7 @@
     Elf_Phdr* program_header = GetProgramHeader(i);
     if (program_header == nullptr) {
       *error_msg = StringPrintf("No program header for entry %d in ELF file %s.",
-                                i, file_->GetPath().c_str());
+                                i, file->GetPath().c_str());
       return false;
     }
 
@@ -1106,11 +1115,11 @@
     // non-zero, the segments require the specific address specified,
     // which either was specified in the file because we already set
     // base_address_ after the first zero segment).
-    int64_t temp_file_length = file_->GetLength();
+    int64_t temp_file_length = file->GetLength();
     if (temp_file_length < 0) {
       errno = -temp_file_length;
       *error_msg = StringPrintf("Failed to get length of file: '%s' fd=%d: %s",
-                                file_->GetPath().c_str(), file_->Fd(), strerror(errno));
+                                file->GetPath().c_str(), file->Fd(), strerror(errno));
       return false;
     }
     size_t file_length = static_cast<size_t>(temp_file_length);
@@ -1122,7 +1131,7 @@
         reserve_base_override = requested_base_;
       }
       std::string reservation_name("ElfFile reservation for ");
-      reservation_name += file_->GetPath();
+      reservation_name += file->GetPath();
       size_t loaded_size;
       if (!GetLoadedSize(&loaded_size, error_msg)) {
         DCHECK(!error_msg->empty());
@@ -1178,7 +1187,7 @@
       *error_msg = StringPrintf("Invalid p_filesz > p_memsz (%" PRIu64 " > %" PRIu64 "): %s",
                                 static_cast<uint64_t>(program_header->p_filesz),
                                 static_cast<uint64_t>(program_header->p_memsz),
-                                file_->GetPath().c_str());
+                                file->GetPath().c_str());
       return false;
     }
     if (program_header->p_filesz < program_header->p_memsz &&
@@ -1187,14 +1196,14 @@
                                 " < %" PRIu64 "): %s",
                                 static_cast<uint64_t>(program_header->p_filesz),
                                 static_cast<uint64_t>(program_header->p_memsz),
-                                file_->GetPath().c_str());
+                                file->GetPath().c_str());
       return false;
     }
     if (file_length < (program_header->p_offset + program_header->p_filesz)) {
       *error_msg = StringPrintf("File size of %zd bytes not large enough to contain ELF segment "
                                 "%d of %" PRIu64 " bytes: '%s'", file_length, i,
                                 static_cast<uint64_t>(program_header->p_offset + program_header->p_filesz),
-                                file_->GetPath().c_str());
+                                file->GetPath().c_str());
       return false;
     }
     if (program_header->p_filesz != 0u) {
@@ -1203,28 +1212,28 @@
                                    program_header->p_filesz,
                                    prot,
                                    flags,
-                                   file_->Fd(),
+                                   file->Fd(),
                                    program_header->p_offset,
                                    /*low4_gb*/false,
                                    /*reuse*/true,  // implies MAP_FIXED
-                                   file_->GetPath().c_str(),
+                                   file->GetPath().c_str(),
                                    error_msg));
       if (segment.get() == nullptr) {
         *error_msg = StringPrintf("Failed to map ELF file segment %d from %s: %s",
-                                  i, file_->GetPath().c_str(), error_msg->c_str());
+                                  i, file->GetPath().c_str(), error_msg->c_str());
         return false;
       }
       if (segment->Begin() != p_vaddr) {
         *error_msg = StringPrintf("Failed to map ELF file segment %d from %s at expected address %p, "
                                   "instead mapped to %p",
-                                  i, file_->GetPath().c_str(), p_vaddr, segment->Begin());
+                                  i, file->GetPath().c_str(), p_vaddr, segment->Begin());
         return false;
       }
       segments_.push_back(segment.release());
     }
     if (program_header->p_filesz < program_header->p_memsz) {
       std::string name = StringPrintf("Zero-initialized segment %" PRIu64 " of ELF file %s",
-                                      static_cast<uint64_t>(i), file_->GetPath().c_str());
+                                      static_cast<uint64_t>(i), file->GetPath().c_str());
       std::unique_ptr<MemMap> segment(
           MemMap::MapAnonymous(name.c_str(),
                                p_vaddr + program_header->p_filesz,
@@ -1232,13 +1241,13 @@
                                prot, false, true /* reuse */, error_msg));
       if (segment == nullptr) {
         *error_msg = StringPrintf("Failed to map zero-initialized ELF file segment %d from %s: %s",
-                                  i, file_->GetPath().c_str(), error_msg->c_str());
+                                  i, file->GetPath().c_str(), error_msg->c_str());
         return false;
       }
       if (segment->Begin() != p_vaddr) {
         *error_msg = StringPrintf("Failed to map zero-initialized ELF file segment %d from %s "
                                   "at expected address %p, instead mapped to %p",
-                                  i, file_->GetPath().c_str(), p_vaddr, segment->Begin());
+                                  i, file->GetPath().c_str(), p_vaddr, segment->Begin());
         return false;
       }
       segments_.push_back(segment.release());
@@ -1249,7 +1258,7 @@
   uint8_t* dsptr = base_address_ + GetDynamicProgramHeader().p_vaddr;
   if ((dsptr < Begin() || dsptr >= End()) && !ValidPointer(dsptr)) {
     *error_msg = StringPrintf("dynamic section address invalid in ELF file %s",
-                              file_->GetPath().c_str());
+                              file->GetPath().c_str());
     return false;
   }
   dynamic_section_start_ = reinterpret_cast<Elf_Dyn*>(dsptr);
@@ -1261,7 +1270,7 @@
       case DT_HASH: {
         if (!ValidPointer(d_ptr)) {
           *error_msg = StringPrintf("DT_HASH value %p does not refer to a loaded ELF segment of %s",
-                                    d_ptr, file_->GetPath().c_str());
+                                    d_ptr, file->GetPath().c_str());
           return false;
         }
         hash_section_start_ = reinterpret_cast<Elf_Word*>(d_ptr);
@@ -1270,7 +1279,7 @@
       case DT_STRTAB: {
         if (!ValidPointer(d_ptr)) {
           *error_msg = StringPrintf("DT_HASH value %p does not refer to a loaded ELF segment of %s",
-                                    d_ptr, file_->GetPath().c_str());
+                                    d_ptr, file->GetPath().c_str());
           return false;
         }
         dynstr_section_start_ = reinterpret_cast<char*>(d_ptr);
@@ -1279,7 +1288,7 @@
       case DT_SYMTAB: {
         if (!ValidPointer(d_ptr)) {
           *error_msg = StringPrintf("DT_HASH value %p does not refer to a loaded ELF segment of %s",
-                                    d_ptr, file_->GetPath().c_str());
+                                    d_ptr, file->GetPath().c_str());
           return false;
         }
         dynsym_section_start_ = reinterpret_cast<Elf_Sym*>(d_ptr);
@@ -1289,7 +1298,7 @@
         if (GetDynamicNum() != i+1) {
           *error_msg = StringPrintf("DT_NULL found after %d .dynamic entries, "
                                     "expected %d as implied by size of PT_DYNAMIC segment in %s",
-                                    i + 1, GetDynamicNum(), file_->GetPath().c_str());
+                                    i + 1, GetDynamicNum(), file->GetPath().c_str());
           return false;
         }
         break;
@@ -1298,7 +1307,7 @@
   }
 
   // Check for the existence of some sections.
-  if (!CheckSectionsExist(error_msg)) {
+  if (!CheckSectionsExist(file, error_msg)) {
     return false;
   }
 
@@ -1392,7 +1401,7 @@
 }
 
 template <typename ElfTypes>
-bool ElfFileImpl<ElfTypes>::Strip(std::string* error_msg) {
+bool ElfFileImpl<ElfTypes>::Strip(File* file, std::string* error_msg) {
   // ELF files produced by MCLinker look roughly like this
   //
   // +------------+
@@ -1484,10 +1493,10 @@
 
   GetHeader().e_shnum = section_headers.size();
   GetHeader().e_shoff = shoff;
-  int result = ftruncate(file_->Fd(), offset);
+  int result = ftruncate(file->Fd(), offset);
   if (result != 0) {
     *error_msg = StringPrintf("Failed to truncate while stripping ELF file: '%s': %s",
-                              file_->GetPath().c_str(), strerror(errno));
+                              file->GetPath().c_str(), strerror(errno));
     return false;
   }
   return true;
@@ -1498,32 +1507,32 @@
 template <typename ElfTypes>
 bool ElfFileImpl<ElfTypes>::Fixup(Elf_Addr base_address) {
   if (!FixupDynamic(base_address)) {
-    LOG(WARNING) << "Failed to fixup .dynamic in " << file_->GetPath();
+    LOG(WARNING) << "Failed to fixup .dynamic in " << file_path_;
     return false;
   }
   if (!FixupSectionHeaders(base_address)) {
-    LOG(WARNING) << "Failed to fixup section headers in " << file_->GetPath();
+    LOG(WARNING) << "Failed to fixup section headers in " << file_path_;
     return false;
   }
   if (!FixupProgramHeaders(base_address)) {
-    LOG(WARNING) << "Failed to fixup program headers in " << file_->GetPath();
+    LOG(WARNING) << "Failed to fixup program headers in " << file_path_;
     return false;
   }
   if (!FixupSymbols(base_address, true)) {
-    LOG(WARNING) << "Failed to fixup .dynsym in " << file_->GetPath();
+    LOG(WARNING) << "Failed to fixup .dynsym in " << file_path_;
     return false;
   }
   if (!FixupSymbols(base_address, false)) {
-    LOG(WARNING) << "Failed to fixup .symtab in " << file_->GetPath();
+    LOG(WARNING) << "Failed to fixup .symtab in " << file_path_;
     return false;
   }
   if (!FixupRelocations(base_address)) {
-    LOG(WARNING) << "Failed to fixup .rel.dyn in " << file_->GetPath();
+    LOG(WARNING) << "Failed to fixup .rel.dyn in " << file_path_;
     return false;
   }
   static_assert(sizeof(Elf_Off) >= sizeof(base_address), "Potentially losing precision.");
   if (!FixupDebugSections(static_cast<Elf_Off>(base_address))) {
-    LOG(WARNING) << "Failed to fixup debug sections in " << file_->GetPath();
+    LOG(WARNING) << "Failed to fixup debug sections in " << file_path_;
     return false;
   }
   return true;
@@ -1538,7 +1547,7 @@
       Elf_Addr d_ptr = elf_dyn.d_un.d_ptr;
       if (DEBUG_FIXUP) {
         LOG(INFO) << StringPrintf("In %s moving Elf_Dyn[%d] from 0x%" PRIx64 " to 0x%" PRIx64,
-                                  GetFile().GetPath().c_str(), i,
+                                  file_path_.c_str(), i,
                                   static_cast<uint64_t>(d_ptr),
                                   static_cast<uint64_t>(d_ptr + base_address));
       }
@@ -1560,7 +1569,7 @@
     }
     if (DEBUG_FIXUP) {
       LOG(INFO) << StringPrintf("In %s moving Elf_Shdr[%d] from 0x%" PRIx64 " to 0x%" PRIx64,
-                                GetFile().GetPath().c_str(), i,
+                                file_path_.c_str(), i,
                                 static_cast<uint64_t>(sh->sh_addr),
                                 static_cast<uint64_t>(sh->sh_addr + base_address));
     }
@@ -1575,19 +1584,19 @@
   for (Elf_Word i = 0; i < GetProgramHeaderNum(); i++) {
     Elf_Phdr* ph = GetProgramHeader(i);
     CHECK(ph != nullptr);
-    CHECK_EQ(ph->p_vaddr, ph->p_paddr) << GetFile().GetPath() << " i=" << i;
+    CHECK_EQ(ph->p_vaddr, ph->p_paddr) << file_path_ << " i=" << i;
     CHECK((ph->p_align == 0) || (0 == ((ph->p_vaddr - ph->p_offset) & (ph->p_align - 1))))
-            << GetFile().GetPath() << " i=" << i;
+        << file_path_ << " i=" << i;
     if (DEBUG_FIXUP) {
       LOG(INFO) << StringPrintf("In %s moving Elf_Phdr[%d] from 0x%" PRIx64 " to 0x%" PRIx64,
-                                GetFile().GetPath().c_str(), i,
+                                file_path_.c_str(), i,
                                 static_cast<uint64_t>(ph->p_vaddr),
                                 static_cast<uint64_t>(ph->p_vaddr + base_address));
     }
     ph->p_vaddr += base_address;
     ph->p_paddr += base_address;
     CHECK((ph->p_align == 0) || (0 == ((ph->p_vaddr - ph->p_offset) & (ph->p_align - 1))))
-            << GetFile().GetPath() << " i=" << i;
+        << file_path_ << " i=" << i;
   }
   return true;
 }
@@ -1599,7 +1608,7 @@
   Elf_Shdr* symbol_section = FindSectionByType(section_type);
   if (symbol_section == nullptr) {
     // file is missing optional .symtab
-    CHECK(!dynamic) << GetFile().GetPath();
+    CHECK(!dynamic) << file_path_;
     return true;
   }
   for (uint32_t i = 0; i < GetSymbolNum(*symbol_section); i++) {
@@ -1608,7 +1617,7 @@
     if (symbol->st_value != 0) {
       if (DEBUG_FIXUP) {
         LOG(INFO) << StringPrintf("In %s moving Elf_Sym[%d] from 0x%" PRIx64 " to 0x%" PRIx64,
-                                  GetFile().GetPath().c_str(), i,
+                                  file_path_.c_str(), i,
                                   static_cast<uint64_t>(symbol->st_value),
                                   static_cast<uint64_t>(symbol->st_value + base_address));
       }
@@ -1628,7 +1637,7 @@
         Elf_Rel& rel = GetRel(*sh, j);
         if (DEBUG_FIXUP) {
           LOG(INFO) << StringPrintf("In %s moving Elf_Rel[%d] from 0x%" PRIx64 " to 0x%" PRIx64,
-                                    GetFile().GetPath().c_str(), j,
+                                    file_path_.c_str(), j,
                                     static_cast<uint64_t>(rel.r_offset),
                                     static_cast<uint64_t>(rel.r_offset + base_address));
         }
@@ -1639,7 +1648,7 @@
         Elf_Rela& rela = GetRela(*sh, j);
         if (DEBUG_FIXUP) {
           LOG(INFO) << StringPrintf("In %s moving Elf_Rela[%d] from 0x%" PRIx64 " to 0x%" PRIx64,
-                                    GetFile().GetPath().c_str(), j,
+                                    file_path_.c_str(), j,
                                     static_cast<uint64_t>(rela.r_offset),
                                     static_cast<uint64_t>(rela.r_offset + base_address));
         }
@@ -1695,8 +1704,9 @@
                                                        low_4gb,
                                                        error_msg,
                                                        requested_base);
-    if (elf_file_impl == nullptr)
+    if (elf_file_impl == nullptr) {
       return nullptr;
+    }
     return new ElfFile(elf_file_impl);
   } else if (header[EI_CLASS] == ELFCLASS32) {
     ElfFileImpl32* elf_file_impl = ElfFileImpl32::Open(file,
@@ -1775,8 +1785,8 @@
     return elf32_->func(__VA_ARGS__); \
   }
 
-bool ElfFile::Load(bool executable, bool low_4gb, std::string* error_msg) {
-  DELEGATE_TO_IMPL(Load, executable, low_4gb, error_msg);
+bool ElfFile::Load(File* file, bool executable, bool low_4gb, std::string* error_msg) {
+  DELEGATE_TO_IMPL(Load, file, executable, low_4gb, error_msg);
 }
 
 const uint8_t* ElfFile::FindDynamicSymbolAddress(const std::string& symbol_name) const {
@@ -1795,8 +1805,8 @@
   DELEGATE_TO_IMPL(End);
 }
 
-const File& ElfFile::GetFile() const {
-  DELEGATE_TO_IMPL(GetFile);
+const std::string& ElfFile::GetFilePath() const {
+  DELEGATE_TO_IMPL(GetFilePath);
 }
 
 bool ElfFile::GetSectionOffsetAndSize(const char* section_name, uint64_t* offset,
@@ -1854,10 +1864,11 @@
     return false;
   }
 
-  if (elf_file->elf64_.get() != nullptr)
-    return elf_file->elf64_->Strip(error_msg);
-  else
-    return elf_file->elf32_->Strip(error_msg);
+  if (elf_file->elf64_.get() != nullptr) {
+    return elf_file->elf64_->Strip(file, error_msg);
+  } else {
+    return elf_file->elf32_->Strip(file, error_msg);
+  }
 }
 
 bool ElfFile::Fixup(uint64_t base_address) {
diff --git a/runtime/elf_file.h b/runtime/elf_file.h
index c3616f7..b1c9395 100644
--- a/runtime/elf_file.h
+++ b/runtime/elf_file.h
@@ -53,7 +53,7 @@
   ~ElfFile();
 
   // Load segments into memory based on PT_LOAD program headers
-  bool Load(bool executable, bool low_4gb, std::string* error_msg);
+  bool Load(File* file, bool executable, bool low_4gb, std::string* error_msg);
 
   const uint8_t* FindDynamicSymbolAddress(const std::string& symbol_name) const;
 
@@ -65,7 +65,7 @@
   // The end of the memory map address range for this ELF file.
   uint8_t* End() const;
 
-  const File& GetFile() const;
+  const std::string& GetFilePath() const;
 
   bool GetSectionOffsetAndSize(const char* section_name, uint64_t* offset, uint64_t* size) const;
 
diff --git a/runtime/elf_file_impl.h b/runtime/elf_file_impl.h
index 1cdbedc..04c2243 100644
--- a/runtime/elf_file_impl.h
+++ b/runtime/elf_file_impl.h
@@ -61,8 +61,8 @@
                            std::string* error_msg);
   ~ElfFileImpl();
 
-  const File& GetFile() const {
-    return *file_;
+  const std::string& GetFilePath() const {
+    return file_path_;
   }
 
   uint8_t* Begin() const {
@@ -119,7 +119,7 @@
 
   // Load segments into memory based on PT_LOAD program headers.
   // executable is true at run time, false at compile time.
-  bool Load(bool executable, bool low_4gb, std::string* error_msg);
+  bool Load(File* file, bool executable, bool low_4gb, std::string* error_msg);
 
   bool Fixup(Elf_Addr base_address);
   bool FixupDynamic(Elf_Addr base_address);
@@ -132,14 +132,14 @@
   static void ApplyOatPatches(const uint8_t* patches, const uint8_t* patches_end, Elf_Addr delta,
                               uint8_t* to_patch, const uint8_t* to_patch_end);
 
-  bool Strip(std::string* error_msg);
+  bool Strip(File* file, std::string* error_msg);
 
  private:
   ElfFileImpl(File* file, bool writable, bool program_header_only, uint8_t* requested_base);
 
-  bool Setup(int prot, int flags, bool low_4gb, std::string* error_msg);
+  bool Setup(File* file, int prot, int flags, bool low_4gb, std::string* error_msg);
 
-  bool SetMap(MemMap* map, std::string* error_msg);
+  bool SetMap(File* file, MemMap* map, std::string* error_msg);
 
   uint8_t* GetProgramHeadersStart() const;
   uint8_t* GetSectionHeadersStart() const;
@@ -163,7 +163,7 @@
   const Elf_Sym* FindDynamicSymbol(const std::string& symbol_name) const;
 
   // Check that certain sections and their dependencies exist.
-  bool CheckSectionsExist(std::string* error_msg) const;
+  bool CheckSectionsExist(File* file, std::string* error_msg) const;
 
   // Check that the link of the first section links to the second section.
   bool CheckSectionsLinked(const uint8_t* source, const uint8_t* target) const;
@@ -191,7 +191,7 @@
   // Lookup a string by section type. Returns null for special 0 offset.
   const char* GetString(Elf_Word section_type, Elf_Word) const;
 
-  const File* const file_;
+  const std::string file_path_;
   const bool writable_;
   const bool program_header_only_;
 
diff --git a/runtime/entrypoints/entrypoint_utils-inl.h b/runtime/entrypoints/entrypoint_utils-inl.h
index e37db7d..ed60f59 100644
--- a/runtime/entrypoints/entrypoint_utils-inl.h
+++ b/runtime/entrypoints/entrypoint_utils-inl.h
@@ -148,12 +148,13 @@
   }
   if (kAccessCheck) {
     if (UNLIKELY(!klass->IsInstantiable())) {
-      self->ThrowNewException("Ljava/lang/InstantiationError;", PrettyDescriptor(klass).c_str());
+      self->ThrowNewException("Ljava/lang/InstantiationError;", klass->PrettyDescriptor().c_str());
       *slow_path = true;
       return nullptr;  // Failure
     }
     if (UNLIKELY(klass->IsClassClass())) {
-      ThrowIllegalAccessError(nullptr, "Class %s is inaccessible", PrettyDescriptor(klass).c_str());
+      ThrowIllegalAccessError(nullptr, "Class %s is inaccessible",
+                              klass->PrettyDescriptor().c_str());
       *slow_path = true;
       return nullptr;  // Failure
     }
@@ -231,10 +232,10 @@
     // CheckObjectAlloc can cause thread suspension which means we may now be instrumented.
     return klass->Alloc</*kInstrumented*/true>(
         self,
-        Runtime::Current()->GetHeap()->GetCurrentAllocator());
+        Runtime::Current()->GetHeap()->GetCurrentAllocator()).Ptr();
   }
   DCHECK(klass != nullptr);
-  return klass->Alloc<kInstrumented>(self, allocator_type);
+  return klass->Alloc<kInstrumented>(self, allocator_type).Ptr();
 }
 
 // Given the context of a calling Method and a resolved class, create an instance.
@@ -254,10 +255,10 @@
     // Pass in false since the object cannot be finalizable.
     // CheckClassInitializedForObjectAlloc can cause thread suspension which means we may now be
     // instrumented.
-    return klass->Alloc</*kInstrumented*/true, false>(self, heap->GetCurrentAllocator());
+    return klass->Alloc</*kInstrumented*/true, false>(self, heap->GetCurrentAllocator()).Ptr();
   }
   // Pass in false since the object cannot be finalizable.
-  return klass->Alloc<kInstrumented, false>(self, allocator_type);
+  return klass->Alloc<kInstrumented, false>(self, allocator_type).Ptr();
 }
 
 // Given the context of a calling Method and an initialized class, create an instance.
@@ -268,7 +269,7 @@
                                                       gc::AllocatorType allocator_type) {
   DCHECK(klass != nullptr);
   // Pass in false since the object cannot be finalizable.
-  return klass->Alloc<kInstrumented, false>(self, allocator_type);
+  return klass->Alloc<kInstrumented, false>(self, allocator_type).Ptr();
 }
 
 
@@ -293,7 +294,7 @@
       DCHECK(Thread::Current()->IsExceptionPending());
       return nullptr;  // Failure
     }
-    CHECK(klass->IsArrayClass()) << PrettyClass(klass);
+    CHECK(klass->IsArrayClass()) << klass->PrettyClass();
   }
   if (kAccessCheck) {
     mirror::Class* referrer = method->GetDeclaringClass();
@@ -433,7 +434,7 @@
                                  "Attempted read of %zd-bit %s on field '%s'",
                                  expected_size * (32 / sizeof(int32_t)),
                                  is_primitive ? "primitive" : "non-primitive",
-                                 PrettyField(resolved_field, true).c_str());
+                                 resolved_field->PrettyField(true).c_str());
         return nullptr;  // Failure.
       }
     }
@@ -482,15 +483,15 @@
 
 template<InvokeType type, bool access_check>
 inline ArtMethod* FindMethodFromCode(uint32_t method_idx,
-                                     mirror::Object** this_object,
+                                     ObjPtr<mirror::Object>* this_object,
                                      ArtMethod* referrer,
                                      Thread* self) {
   ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
   ArtMethod* resolved_method = class_linker->GetResolvedMethod(method_idx, referrer);
   if (resolved_method == nullptr) {
     StackHandleScope<1> hs(self);
-    mirror::Object* null_this = nullptr;
-    HandleWrapper<mirror::Object> h_this(
+    ObjPtr<mirror::Object> null_this = nullptr;
+    HandleWrapperObjPtr<mirror::Object> h_this(
         hs.NewHandleWrapper(type == kStatic ? &null_this : this_object));
     constexpr ClassLinker::ResolveMode resolve_mode =
         access_check ? ClassLinker::kForceICCECheck
@@ -549,7 +550,7 @@
                                resolved_method->GetName(), resolved_method->GetSignature());
         return nullptr;  // Failure.
       }
-      DCHECK(klass->HasVTable()) << PrettyClass(klass);
+      DCHECK(klass->HasVTable()) << klass->PrettyClass();
       return klass->GetVTableEntry(vtable_index, class_linker->GetImagePointerSize());
     }
     case kSuper: {
@@ -559,7 +560,7 @@
       //    defaults. What we actually need is a GetContainingClass that says which classes virtuals
       //    this method is coming from.
       StackHandleScope<2> hs2(self);
-      HandleWrapper<mirror::Object> h_this(hs2.NewHandleWrapper(this_object));
+      HandleWrapperObjPtr<mirror::Object> h_this(hs2.NewHandleWrapper(this_object));
       Handle<mirror::Class> h_referring_class(hs2.NewHandle(referrer->GetDeclaringClass()));
       const uint16_t method_type_idx =
           h_referring_class->GetDexFile().GetMethodId(method_idx).class_idx_;
@@ -624,9 +625,10 @@
           mirror::Class* klass = (*this_object)->GetClass();
           ArtMethod* method = klass->FindVirtualMethodForInterface(
               resolved_method, class_linker->GetImagePointerSize());
-          CHECK_EQ(imt_method, method) << PrettyMethod(resolved_method) << " / " <<
-              PrettyMethod(imt_method) << " / " << PrettyMethod(method) << " / " <<
-              PrettyClass(klass);
+          CHECK_EQ(imt_method, method) << ArtMethod::PrettyMethod(resolved_method) << " / "
+                                       << imt_method->PrettyMethod() << " / "
+                                       << ArtMethod::PrettyMethod(method) << " / "
+                                       << klass->PrettyClass();
         }
         return imt_method;
       } else {
@@ -650,7 +652,7 @@
 #define EXPLICIT_FIND_METHOD_FROM_CODE_TEMPLATE_DECL(_type, _access_check)                 \
   template REQUIRES_SHARED(Locks::mutator_lock_) ALWAYS_INLINE                       \
   ArtMethod* FindMethodFromCode<_type, _access_check>(uint32_t method_idx,         \
-                                                      mirror::Object** this_object, \
+                                                      ObjPtr<mirror::Object>* this_object, \
                                                       ArtMethod* referrer, \
                                                       Thread* self)
 #define EXPLICIT_FIND_METHOD_FROM_CODE_TYPED_TEMPLATE_DECL(_type) \
@@ -720,8 +722,11 @@
 }
 
 // Fast path method resolution that can't throw exceptions.
-inline ArtMethod* FindMethodFast(uint32_t method_idx, mirror::Object* this_object,
-                                 ArtMethod* referrer, bool access_check, InvokeType type) {
+inline ArtMethod* FindMethodFast(uint32_t method_idx,
+                                 ObjPtr<mirror::Object> this_object,
+                                 ArtMethod* referrer,
+                                 bool access_check,
+                                 InvokeType type) {
   ScopedAssertNoThreadSuspension ants(__FUNCTION__);
   if (UNLIKELY(this_object == nullptr && type != kStatic)) {
     return nullptr;
diff --git a/runtime/entrypoints/entrypoint_utils.cc b/runtime/entrypoints/entrypoint_utils.cc
index f8deb8f..1ccb4b0 100644
--- a/runtime/entrypoints/entrypoint_utils.cc
+++ b/runtime/entrypoints/entrypoint_utils.cc
@@ -61,12 +61,12 @@
   if (UNLIKELY(klass->IsPrimitive() && !klass->IsPrimitiveInt())) {
     if (klass->IsPrimitiveLong() || klass->IsPrimitiveDouble()) {
       ThrowRuntimeException("Bad filled array request for type %s",
-                            PrettyDescriptor(klass).c_str());
+                            klass->PrettyDescriptor().c_str());
     } else {
       self->ThrowNewExceptionF(
           "Ljava/lang/InternalError;",
           "Found type %s; filled-new-array not implemented for anything but 'int'",
-          PrettyDescriptor(klass).c_str());
+          klass->PrettyDescriptor().c_str());
     }
     return nullptr;  // Failure
   }
@@ -77,7 +77,7 @@
       return nullptr;  // Failure
     }
   }
-  DCHECK(klass->IsArrayClass()) << PrettyClass(klass);
+  DCHECK(klass->IsArrayClass()) << klass->PrettyClass();
   return klass;
 }
 
@@ -131,8 +131,8 @@
   if (!o->InstanceOf(return_type)) {
     Runtime::Current()->GetJavaVM()->JniAbortF(nullptr,
                                                "attempt to return an instance of %s from %s",
-                                               PrettyTypeOf(o.Get()).c_str(),
-                                               PrettyMethod(method).c_str());
+                                               o->PrettyTypeOf().c_str(),
+                                               method->PrettyMethod().c_str());
   }
 }
 
@@ -237,7 +237,7 @@
   }
 }
 
-bool FillArrayData(mirror::Object* obj, const Instruction::ArrayDataPayload* payload) {
+bool FillArrayData(ObjPtr<mirror::Object> obj, const Instruction::ArrayDataPayload* payload) {
   DCHECK_EQ(payload->ident, static_cast<uint16_t>(Instruction::kArrayDataSignature));
   if (UNLIKELY(obj == nullptr)) {
     ThrowNullPointerException("null array in FILL_ARRAY_DATA");
diff --git a/runtime/entrypoints/entrypoint_utils.h b/runtime/entrypoints/entrypoint_utils.h
index 20c8401..bcddfb0 100644
--- a/runtime/entrypoints/entrypoint_utils.h
+++ b/runtime/entrypoints/entrypoint_utils.h
@@ -156,7 +156,7 @@
 
 template<InvokeType type, bool access_check>
 inline ArtMethod* FindMethodFromCode(uint32_t method_idx,
-                                     mirror::Object** this_object,
+                                     ObjPtr<mirror::Object>* this_object,
                                      ArtMethod* referrer,
                                      Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_)
@@ -171,7 +171,7 @@
 
 // Fast path method resolution that can't throw exceptions.
 inline ArtMethod* FindMethodFast(uint32_t method_idx,
-                                 mirror::Object* this_object,
+                                 ObjPtr<mirror::Object> this_object,
                                  ArtMethod* referrer,
                                  bool access_check,
                                  InvokeType type)
@@ -203,7 +203,7 @@
     REQUIRES_SHARED(Locks::mutator_lock_)
     REQUIRES(!Roles::uninterruptible_);
 
-bool FillArrayData(mirror::Object* obj, const Instruction::ArrayDataPayload* payload)
+bool FillArrayData(ObjPtr<mirror::Object> obj, const Instruction::ArrayDataPayload* payload)
     REQUIRES_SHARED(Locks::mutator_lock_)
     REQUIRES(!Roles::uninterruptible_);
 
diff --git a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
index dc5fd07..515fcbf 100644
--- a/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_alloc_entrypoints.cc
@@ -43,11 +43,8 @@
         obj = self->AllocTlab(byte_count); \
         DCHECK(obj != nullptr) << "AllocTlab can't fail"; \
         obj->SetClass(klass); \
-        if (kUseBakerOrBrooksReadBarrier) { \
-          if (kUseBrooksReadBarrier) { \
-            obj->SetReadBarrierPointer(obj); \
-          } \
-          obj->AssertReadBarrierPointer(); \
+        if (kUseBakerReadBarrier) { \
+          obj->AssertReadBarrierState(); \
         } \
         QuasiAtomic::ThreadFenceForConstructor(); \
         return obj; \
@@ -69,11 +66,8 @@
         obj = self->AllocTlab(byte_count); \
         DCHECK(obj != nullptr) << "AllocTlab can't fail"; \
         obj->SetClass(klass); \
-        if (kUseBakerOrBrooksReadBarrier) { \
-          if (kUseBrooksReadBarrier) { \
-            obj->SetReadBarrierPointer(obj); \
-          } \
-          obj->AssertReadBarrierPointer(); \
+        if (kUseBakerReadBarrier) { \
+          obj->AssertReadBarrierState(); \
         } \
         QuasiAtomic::ThreadFenceForConstructor(); \
         return obj; \
@@ -94,11 +88,8 @@
       obj = self->AllocTlab(byte_count); \
       DCHECK(obj != nullptr) << "AllocTlab can't fail"; \
       obj->SetClass(klass); \
-      if (kUseBakerOrBrooksReadBarrier) { \
-        if (kUseBrooksReadBarrier) { \
-          obj->SetReadBarrierPointer(obj); \
-        } \
-        obj->AssertReadBarrierPointer(); \
+      if (kUseBakerReadBarrier) { \
+        obj->AssertReadBarrierState(); \
       } \
       QuasiAtomic::ThreadFenceForConstructor(); \
       return obj; \
diff --git a/runtime/entrypoints/quick/quick_cast_entrypoints.cc b/runtime/entrypoints/quick/quick_cast_entrypoints.cc
index 2732d68..083d578 100644
--- a/runtime/entrypoints/quick/quick_cast_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_cast_entrypoints.cc
@@ -27,4 +27,12 @@
   return klass->IsAssignableFrom(ref_class) ? 1 : 0;
 }
 
+// Is assignable test for code, won't throw.  Null and equality test already performed.
+extern "C" size_t artInstanceOfFromCode(mirror::Object* obj, mirror::Class* ref_class)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  DCHECK(obj != nullptr);
+  DCHECK(ref_class != nullptr);
+  return obj->InstanceOf(ref_class) ? 1 : 0;
+}
+
 }  // namespace art
diff --git a/runtime/entrypoints/quick/quick_default_externs.h b/runtime/entrypoints/quick/quick_default_externs.h
index cfa5325..64030f3 100644
--- a/runtime/entrypoints/quick/quick_default_externs.h
+++ b/runtime/entrypoints/quick/quick_default_externs.h
@@ -31,7 +31,7 @@
 // These are extern declarations of assembly stubs with common names.
 
 // Cast entrypoints.
-extern "C" void art_quick_check_cast(const art::mirror::Class*, const art::mirror::Class*);
+extern "C" void art_quick_check_instance_of(art::mirror::Object*, art::mirror::Class*);
 
 // DexCache entrypoints.
 extern "C" void* art_quick_initialize_static_storage(uint32_t);
diff --git a/runtime/entrypoints/quick/quick_default_init_entrypoints.h b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
index 1ee1f81..df23f94 100644
--- a/runtime/entrypoints/quick/quick_default_init_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_default_init_entrypoints.h
@@ -78,6 +78,7 @@
   qpoints->pJniMethodEnd = JniMethodEnd;
   qpoints->pJniMethodEndSynchronized = JniMethodEndSynchronized;
   qpoints->pJniMethodEndWithReference = JniMethodEndWithReference;
+  qpoints->pJniMethodFastEndWithReference = JniMethodFastEndWithReference;
   qpoints->pJniMethodEndWithReferenceSynchronized = JniMethodEndWithReferenceSynchronized;
   qpoints->pJniMethodFastEnd = JniMethodFastEnd;
   qpoints->pQuickGenericJniTrampoline = art_quick_generic_jni_trampoline;
diff --git a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
index 4311d19..d438418 100644
--- a/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_dexcache_entrypoints.cc
@@ -60,15 +60,25 @@
 extern "C" mirror::String* artResolveStringFromCode(int32_t string_idx, Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ScopedQuickEntrypointChecks sqec(self);
-  auto* caller = GetCalleeSaveMethodCaller(self, Runtime::kSaveRefsOnly);
+  auto* caller = GetCalleeSaveMethodCaller(
+      self,
+      // TODO: Change art_quick_resolve_string on MIPS and MIPS64 to kSaveEverything.
+      (kRuntimeISA == kMips || kRuntimeISA == kMips64) ? Runtime::kSaveRefsOnly
+                                                       : Runtime::kSaveEverything);
   mirror::String* result = ResolveStringFromCode(caller, string_idx);
   if (LIKELY(result != nullptr)) {
-    // For AOT code, we need a write barrier for the dex cache that holds the GC roots in the .bss.
+    // For AOT code, we need a write barrier for the class loader that holds
+    // the GC roots in the .bss.
     const DexFile* dex_file = caller->GetDexFile();
     if (dex_file != nullptr &&
         dex_file->GetOatDexFile() != nullptr &&
         !dex_file->GetOatDexFile()->GetOatFile()->GetBssGcRoots().empty()) {
       mirror::ClassLoader* class_loader = caller->GetDeclaringClass()->GetClassLoader();
+      DCHECK(class_loader != nullptr);  // We do not use .bss GC roots for boot image.
+      DCHECK(
+          !class_loader->GetClassTable()->InsertOatFile(dex_file->GetOatDexFile()->GetOatFile()))
+          << "Oat file with .bss GC roots was not registered in class table: "
+          << dex_file->GetOatDexFile()->GetOatFile()->GetLocation();
       // Note that we emit the barrier before the compiled code stores the string as GC root.
       // This is OK as there is no suspend point point in between.
       Runtime::Current()->GetHeap()->WriteBarrierEveryFieldOf(class_loader);
diff --git a/runtime/entrypoints/quick/quick_entrypoints.h b/runtime/entrypoints/quick/quick_entrypoints.h
index 89712a3..915f18e 100644
--- a/runtime/entrypoints/quick/quick_entrypoints.h
+++ b/runtime/entrypoints/quick/quick_entrypoints.h
@@ -65,6 +65,11 @@
 extern mirror::Object* JniMethodEndWithReference(jobject result, uint32_t saved_local_ref_cookie,
                                                  Thread* self)
     NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
+extern mirror::Object* JniMethodFastEndWithReference(jobject result,
+                                                     uint32_t saved_local_ref_cookie,
+                                                     Thread* self)
+    NO_THREAD_SAFETY_ANALYSIS HOT_ATTR;
+
 
 extern mirror::Object* JniMethodEndWithReferenceSynchronized(jobject result,
                                                              uint32_t saved_local_ref_cookie,
diff --git a/runtime/entrypoints/quick/quick_entrypoints_list.h b/runtime/entrypoints/quick/quick_entrypoints_list.h
index e402919..a1c5082 100644
--- a/runtime/entrypoints/quick/quick_entrypoints_list.h
+++ b/runtime/entrypoints/quick/quick_entrypoints_list.h
@@ -33,8 +33,8 @@
   V(AllocStringFromChars, void*, int32_t, int32_t, void*) \
   V(AllocStringFromString, void*, void*) \
 \
-  V(InstanceofNonTrivial, size_t, const mirror::Class*, const mirror::Class*) \
-  V(CheckCast, void, const mirror::Class*, const mirror::Class*) \
+  V(InstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*) \
+  V(CheckInstanceOf, void, mirror::Object*, mirror::Class*) \
 \
   V(InitializeStaticStorage, void*, uint32_t) \
   V(InitializeTypeAndVerifyAccess, void*, uint32_t) \
@@ -78,6 +78,7 @@
   V(JniMethodFastEnd, void, uint32_t, Thread*) \
   V(JniMethodEndSynchronized, void, uint32_t, jobject, Thread*) \
   V(JniMethodEndWithReference, mirror::Object*, jobject, uint32_t, Thread*) \
+  V(JniMethodFastEndWithReference, mirror::Object*, jobject, uint32_t, Thread*) \
   V(JniMethodEndWithReferenceSynchronized, mirror::Object*, jobject, uint32_t, jobject, Thread*) \
   V(QuickGenericJniTrampoline, void, ArtMethod*) \
 \
diff --git a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
index fec7373..aa547bf 100644
--- a/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_instrumentation_entrypoints.cc
@@ -44,7 +44,7 @@
   bool interpreter_entry = (result == GetQuickToInterpreterBridge());
   instrumentation->PushInstrumentationStackFrame(self, method->IsStatic() ? nullptr : this_object,
                                                  method, lr, interpreter_entry);
-  CHECK(result != nullptr) << PrettyMethod(method);
+  CHECK(result != nullptr) << method->PrettyMethod();
   return result;
 }
 
diff --git a/runtime/entrypoints/quick/quick_jni_entrypoints.cc b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
index 446e343..670dadc 100644
--- a/runtime/entrypoints/quick/quick_jni_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_jni_entrypoints.cc
@@ -15,15 +15,32 @@
  */
 
 #include "art_method-inl.h"
+#include "base/casts.h"
 #include "entrypoints/entrypoint_utils-inl.h"
+#include "indirect_reference_table.h"
 #include "mirror/object-inl.h"
 #include "thread-inl.h"
 #include "verify_object-inl.h"
 
 namespace art {
 
+static_assert(sizeof(IRTSegmentState) == sizeof(uint32_t), "IRTSegmentState size unexpected");
+static_assert(std::is_trivial<IRTSegmentState>::value, "IRTSegmentState not trivial");
+
+template <bool kDynamicFast>
+static inline void GoToRunnableFast(Thread* self) NO_THREAD_SAFETY_ANALYSIS;
+
 extern void ReadBarrierJni(mirror::CompressedReference<mirror::Object>* handle_on_stack,
                            Thread* self ATTRIBUTE_UNUSED) {
+  DCHECK(kUseReadBarrier);
+  if (kUseBakerReadBarrier) {
+    DCHECK(handle_on_stack->AsMirrorPtr() != nullptr)
+        << "The class of a static jni call must not be null";
+    // Check the mark bit and return early if it's already marked.
+    if (LIKELY(handle_on_stack->AsMirrorPtr()->GetMarkBit() != 0)) {
+      return;
+    }
+  }
   // Call the read barrier and update the handle.
   mirror::Object* to_ref = ReadBarrier::BarrierForRoot(handle_on_stack);
   handle_on_stack->Assign(to_ref);
@@ -33,12 +50,12 @@
 extern uint32_t JniMethodFastStart(Thread* self) {
   JNIEnvExt* env = self->GetJniEnv();
   DCHECK(env != nullptr);
-  uint32_t saved_local_ref_cookie = env->local_ref_cookie;
+  uint32_t saved_local_ref_cookie = bit_cast<uint32_t>(env->local_ref_cookie);
   env->local_ref_cookie = env->locals.GetSegmentState();
 
   if (kIsDebugBuild) {
     ArtMethod* native_method = *self->GetManagedStack()->GetTopQuickFrame();
-    CHECK(native_method->IsAnnotatedWithFastNative()) << PrettyMethod(native_method);
+    CHECK(native_method->IsAnnotatedWithFastNative()) << native_method->PrettyMethod();
   }
 
   return saved_local_ref_cookie;
@@ -48,7 +65,7 @@
 extern uint32_t JniMethodStart(Thread* self) {
   JNIEnvExt* env = self->GetJniEnv();
   DCHECK(env != nullptr);
-  uint32_t saved_local_ref_cookie = env->local_ref_cookie;
+  uint32_t saved_local_ref_cookie = bit_cast<uint32_t>(env->local_ref_cookie);
   env->local_ref_cookie = env->locals.GetSegmentState();
   ArtMethod* native_method = *self->GetManagedStack()->GetTopQuickFrame();
   if (!native_method->IsFastNative()) {
@@ -69,7 +86,28 @@
   bool is_fast = native_method->IsFastNative();
   if (!is_fast) {
     self->TransitionFromSuspendedToRunnable();
-  } else if (UNLIKELY(self->TestAllFlags())) {
+  } else {
+    GoToRunnableFast</*kDynamicFast*/true>(self);
+  }
+}
+
+// TODO: NO_THREAD_SAFETY_ANALYSIS due to different control paths depending on fast JNI.
+template <bool kDynamicFast>
+ALWAYS_INLINE static inline void GoToRunnableFast(Thread* self) NO_THREAD_SAFETY_ANALYSIS {
+  if (kIsDebugBuild) {
+    // Should only enter here if the method is !Fast JNI or @FastNative.
+    ArtMethod* native_method = *self->GetManagedStack()->GetTopQuickFrame();
+
+    if (kDynamicFast) {
+      CHECK(native_method->IsFastNative()) << native_method->PrettyMethod();
+    } else {
+      CHECK(native_method->IsAnnotatedWithFastNative()) << native_method->PrettyMethod();
+    }
+  }
+
+  // When we are in "fast" JNI or @FastNative, we are already Runnable.
+  // Only do a suspend check on the way out of JNI.
+  if (UNLIKELY(self->TestAllFlags())) {
     // In fast JNI mode we never transitioned out of runnable. Perform a suspend check if there
     // is a flag raised.
     DCHECK(Locks::mutator_lock_->IsSharedHeld(self));
@@ -84,7 +122,7 @@
     env->CheckNoHeldMonitors();
   }
   env->locals.SetSegmentState(env->local_ref_cookie);
-  env->local_ref_cookie = saved_local_ref_cookie;
+  env->local_ref_cookie = bit_cast<IRTSegmentState>(saved_local_ref_cookie);
   self->PopHandleScope();
 }
 
@@ -97,20 +135,7 @@
 }
 
 extern void JniMethodFastEnd(uint32_t saved_local_ref_cookie, Thread* self) {
-  // inlined fast version of GoToRunnable(self);
-
-  if (kIsDebugBuild) {
-    ArtMethod* native_method = *self->GetManagedStack()->GetTopQuickFrame();
-    CHECK(native_method->IsAnnotatedWithFastNative()) << PrettyMethod(native_method);
-  }
-
-  if (UNLIKELY(self->TestAllFlags())) {
-    // In fast JNI mode we never transitioned out of runnable. Perform a suspend check if there
-    // is a flag raised.
-    DCHECK(Locks::mutator_lock_->IsSharedHeld(self));
-    self->CheckSuspend();
-  }
-
+  GoToRunnableFast</*kDynamicFast*/false>(self);
   PopLocalReferences(saved_local_ref_cookie, self);
 }
 
@@ -122,27 +147,33 @@
   PopLocalReferences(saved_local_ref_cookie, self);
 }
 
-// TODO: JniMethodFastEndWithReference
-// (Probably don't need to have a synchronized variant since
-// it already has to do atomic operations)
-
 // Common result handling for EndWithReference.
 static mirror::Object* JniMethodEndWithReferenceHandleResult(jobject result,
                                                              uint32_t saved_local_ref_cookie,
                                                              Thread* self)
     NO_THREAD_SAFETY_ANALYSIS {
   // Must decode before pop. The 'result' may not be valid in case of an exception, though.
-  mirror::Object* o = self->IsExceptionPending() ? nullptr : self->DecodeJObject(result);
+  ObjPtr<mirror::Object> o;
+  if (!self->IsExceptionPending()) {
+    o = self->DecodeJObject(result);
+  }
   PopLocalReferences(saved_local_ref_cookie, self);
   // Process result.
   if (UNLIKELY(self->GetJniEnv()->check_jni)) {
     // CheckReferenceResult can resolve types.
     StackHandleScope<1> hs(self);
-    HandleWrapper<mirror::Object> h_obj(hs.NewHandleWrapper(&o));
+    HandleWrapperObjPtr<mirror::Object> h_obj(hs.NewHandleWrapper(&o));
     CheckReferenceResult(h_obj, self);
   }
   VerifyObject(o);
-  return o;
+  return o.Ptr();
+}
+
+extern mirror::Object* JniMethodFastEndWithReference(jobject result,
+                                                     uint32_t saved_local_ref_cookie,
+                                                     Thread* self) {
+  GoToRunnableFast</*kDynamicFast*/false>(self);
+  return JniMethodEndWithReferenceHandleResult(result, saved_local_ref_cookie, self);
 }
 
 extern mirror::Object* JniMethodEndWithReference(jobject result,
diff --git a/runtime/entrypoints/quick/quick_throw_entrypoints.cc b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
index a205b17..c8ee99a 100644
--- a/runtime/entrypoints/quick/quick_throw_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_throw_entrypoints.cc
@@ -111,6 +111,14 @@
   self->QuickDeliverException();
 }
 
+extern "C" NO_RETURN void artThrowClassCastExceptionForObject(mirror::Object* obj,
+                                                              mirror::Class* dest_type,
+                                                              Thread* self)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  DCHECK(obj != nullptr);
+  artThrowClassCastException(dest_type, obj->GetClass(), self);
+}
+
 extern "C" NO_RETURN void artThrowArrayStoreException(mirror::Object* array, mirror::Object* value,
                                                       Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
index 126e26c..fe82878 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints.cc
@@ -660,11 +660,11 @@
       StackedShadowFrameType::kDeoptimizationShadowFrame, false);
   ManagedStack fragment;
 
-  DCHECK(!method->IsNative()) << PrettyMethod(method);
+  DCHECK(!method->IsNative()) << method->PrettyMethod();
   uint32_t shorty_len = 0;
   ArtMethod* non_proxy_method = method->GetInterfaceMethodIfProxy(kRuntimePointerSize);
   const DexFile::CodeItem* code_item = non_proxy_method->GetCodeItem();
-  DCHECK(code_item != nullptr) << PrettyMethod(method);
+  DCHECK(code_item != nullptr) << method->PrettyMethod();
   const char* shorty = non_proxy_method->GetShorty(&shorty_len);
 
   JValue result;
@@ -679,8 +679,8 @@
       while (linked->GetLink() != nullptr) {
         linked = linked->GetLink();
       }
-      CHECK_EQ(method, linked->GetMethod()) << PrettyMethod(method) << " "
-          << PrettyMethod(linked->GetMethod());
+      CHECK_EQ(method, linked->GetMethod()) << method->PrettyMethod() << " "
+          << ArtMethod::PrettyMethod(linked->GetMethod());
     }
 
     if (VLOG_IS_ON(deopt)) {
@@ -743,7 +743,8 @@
       StackHandleScope<1> hs(self);
       Handle<mirror::Class> h_class(hs.NewHandle(shadow_frame->GetMethod()->GetDeclaringClass()));
       if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(self, h_class, true, true)) {
-        DCHECK(Thread::Current()->IsExceptionPending()) << PrettyMethod(shadow_frame->GetMethod());
+        DCHECK(Thread::Current()->IsExceptionPending())
+            << shadow_frame->GetMethod()->PrettyMethod();
         self->PopManagedStackFragment(fragment);
         return 0;
       }
@@ -834,7 +835,7 @@
 void BuildQuickArgumentVisitor::FixupReferences() {
   // Fixup any references which may have changed.
   for (const auto& pair : references_) {
-    pair.second->Assign(soa_->Decode<mirror::Object>(pair.first).Ptr());
+    pair.second->Assign(soa_->Decode<mirror::Object>(pair.first));
     soa_->Env()->DeleteLocalRef(pair.first);
   }
 }
@@ -846,13 +847,13 @@
 extern "C" uint64_t artQuickProxyInvokeHandler(
     ArtMethod* proxy_method, mirror::Object* receiver, Thread* self, ArtMethod** sp)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  DCHECK(proxy_method->IsProxyMethod()) << PrettyMethod(proxy_method);
-  DCHECK(receiver->GetClass()->IsProxyClass()) << PrettyMethod(proxy_method);
+  DCHECK(proxy_method->IsProxyMethod()) << proxy_method->PrettyMethod();
+  DCHECK(receiver->GetClass()->IsProxyClass()) << proxy_method->PrettyMethod();
   // Ensure we don't get thread suspension until the object arguments are safely in jobjects.
   const char* old_cause =
       self->StartAssertNoThreadSuspension("Adding to IRT proxy object arguments");
   // Register the top of the managed stack, making stack crawlable.
-  DCHECK_EQ((*sp), proxy_method) << PrettyMethod(proxy_method);
+  DCHECK_EQ((*sp), proxy_method) << proxy_method->PrettyMethod();
   self->VerifyStack();
   // Start new JNI local reference state.
   JNIEnvExt* env = self->GetJniEnv();
@@ -863,21 +864,21 @@
 
   // Placing arguments into args vector and remove the receiver.
   ArtMethod* non_proxy_method = proxy_method->GetInterfaceMethodIfProxy(kRuntimePointerSize);
-  CHECK(!non_proxy_method->IsStatic()) << PrettyMethod(proxy_method) << " "
-                                       << PrettyMethod(non_proxy_method);
+  CHECK(!non_proxy_method->IsStatic()) << proxy_method->PrettyMethod() << " "
+                                       << non_proxy_method->PrettyMethod();
   std::vector<jvalue> args;
   uint32_t shorty_len = 0;
   const char* shorty = non_proxy_method->GetShorty(&shorty_len);
   BuildQuickArgumentVisitor local_ref_visitor(sp, false, shorty, shorty_len, &soa, &args);
 
   local_ref_visitor.VisitArguments();
-  DCHECK_GT(args.size(), 0U) << PrettyMethod(proxy_method);
+  DCHECK_GT(args.size(), 0U) << proxy_method->PrettyMethod();
   args.erase(args.begin());
 
   // Convert proxy method into expected interface method.
   ArtMethod* interface_method = proxy_method->FindOverriddenMethod(kRuntimePointerSize);
-  DCHECK(interface_method != nullptr) << PrettyMethod(proxy_method);
-  DCHECK(!interface_method->IsProxyMethod()) << PrettyMethod(interface_method);
+  DCHECK(interface_method != nullptr) << proxy_method->PrettyMethod();
+  DCHECK(!interface_method->IsProxyMethod()) << interface_method->PrettyMethod();
   self->EndAssertNoThreadSuspension(old_cause);
   DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
   DCHECK(!Runtime::Current()->IsActiveTransaction());
@@ -926,7 +927,7 @@
 void RememberForGcArgumentVisitor::FixupReferences() {
   // Fixup any references which may have changed.
   for (const auto& pair : references_) {
-    pair.second->Assign(soa_->Decode<mirror::Object>(pair.first).Ptr());
+    pair.second->Assign(soa_->Decode<mirror::Object>(pair.first));
     soa_->Env()->DeleteLocalRef(pair.first);
   }
 }
@@ -1034,7 +1035,7 @@
   if (LIKELY(!self->IsExceptionPending())) {
     // Incompatible class change should have been handled in resolve method.
     CHECK(!called->CheckIncompatibleClassChange(invoke_type))
-        << PrettyMethod(called) << " " << invoke_type;
+        << called->PrettyMethod() << " " << invoke_type;
     if (virtual_or_interface || invoke_type == kSuper) {
       // Refine called method based on receiver for kVirtual/kInterface, and
       // caller for kSuper.
@@ -1064,8 +1065,8 @@
         }
       }
 
-      CHECK(called != nullptr) << PrettyMethod(orig_called) << " "
-                               << PrettyTypeOf(receiver) << " "
+      CHECK(called != nullptr) << orig_called->PrettyMethod() << " "
+                               << mirror::Object::PrettyTypeOf(receiver) << " "
                                << invoke_type << " " << orig_called->GetVtableIndex();
 
       // We came here because of sharpening. Ensure the dex cache is up-to-date on the method index
@@ -1999,7 +2000,7 @@
 extern "C" TwoWordReturn artQuickGenericJniTrampoline(Thread* self, ArtMethod** sp)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ArtMethod* called = *sp;
-  DCHECK(called->IsNative()) << PrettyMethod(called, true);
+  DCHECK(called->IsNative()) << called->PrettyMethod(true);
   uint32_t shorty_len = 0;
   const char* shorty = called->GetShorty(&shorty_len);
   bool critical_native = called->IsAnnotatedWithCriticalNative();
@@ -2120,7 +2121,9 @@
 // to hold the mutator lock (see REQUIRES_SHARED(Locks::mutator_lock_) annotations).
 
 template<InvokeType type, bool access_check>
-static TwoWordReturn artInvokeCommon(uint32_t method_idx, mirror::Object* this_object, Thread* self,
+static TwoWordReturn artInvokeCommon(uint32_t method_idx,
+                                     ObjPtr<mirror::Object> this_object,
+                                     Thread* self,
                                      ArtMethod** sp) {
   ScopedQuickEntrypointChecks sqec(self);
   DCHECK_EQ(*sp, Runtime::Current()->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs));
@@ -2135,7 +2138,9 @@
       ScopedObjectAccessUnchecked soa(self->GetJniEnv());
       RememberForGcArgumentVisitor visitor(sp, type == kStatic, shorty, shorty_len, &soa);
       visitor.VisitArguments();
-      method = FindMethodFromCode<type, access_check>(method_idx, &this_object, caller_method,
+      method = FindMethodFromCode<type, access_check>(method_idx,
+                                                      &this_object,
+                                                      caller_method,
                                                       self);
       visitor.FixupReferences();
     }
@@ -2149,7 +2154,7 @@
   const void* code = method->GetEntryPointFromQuickCompiledCode();
 
   // When we return, the caller will branch to this address, so it had better not be 0!
-  DCHECK(code != nullptr) << "Code was null in method: " << PrettyMethod(method)
+  DCHECK(code != nullptr) << "Code was null in method: " << method->PrettyMethod()
                           << " location: "
                           << method->GetDexFile()->GetLocation();
 
@@ -2161,7 +2166,7 @@
 #define EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(type, access_check)                                \
   template REQUIRES_SHARED(Locks::mutator_lock_)                                          \
   TwoWordReturn artInvokeCommon<type, access_check>(                                            \
-      uint32_t method_idx, mirror::Object* this_object, Thread* self, ArtMethod** sp)
+      uint32_t method_idx, ObjPtr<mirror::Object> his_object, Thread* self, ArtMethod** sp)
 
 EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(kVirtual, false);
 EXPLICIT_INVOKE_COMMON_TEMPLATE_DECL(kVirtual, true);
@@ -2189,9 +2194,13 @@
 }
 
 extern "C" TwoWordReturn artInvokeStaticTrampolineWithAccessCheck(
-    uint32_t method_idx, mirror::Object* this_object, Thread* self, ArtMethod** sp)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  return artInvokeCommon<kStatic, true>(method_idx, this_object, self, sp);
+    uint32_t method_idx,
+    mirror::Object* this_object ATTRIBUTE_UNUSED,
+    Thread* self,
+    ArtMethod** sp) REQUIRES_SHARED(Locks::mutator_lock_) {
+  // For static, this_object is not required and may be random garbage. Don't pass it down so that
+  // it doesn't cause ObjPtr alignment failure check.
+  return artInvokeCommon<kStatic, true>(method_idx, nullptr, self, sp);
 }
 
 extern "C" TwoWordReturn artInvokeSuperTrampolineWithAccessCheck(
@@ -2210,10 +2219,11 @@
 // is there for consistency but should not be used, as some architectures overwrite it
 // in the assembly trampoline.
 extern "C" TwoWordReturn artInvokeInterfaceTrampoline(uint32_t deadbeef ATTRIBUTE_UNUSED,
-                                                      mirror::Object* this_object,
+                                                      mirror::Object* raw_this_object,
                                                       Thread* self,
                                                       ArtMethod** sp)
     REQUIRES_SHARED(Locks::mutator_lock_) {
+  ObjPtr<mirror::Object> this_object(raw_this_object);
   ScopedQuickEntrypointChecks sqec(self);
   StackHandleScope<1> hs(self);
   Handle<mirror::Class> cls(hs.NewHandle(this_object->GetClass()));
@@ -2240,7 +2250,7 @@
 
   ArtMethod* interface_method = caller_method->GetDexCacheResolvedMethod(
       dex_method_idx, kRuntimePointerSize);
-  DCHECK(interface_method != nullptr) << dex_method_idx << " " << PrettyMethod(caller_method);
+  DCHECK(interface_method != nullptr) << dex_method_idx << " " << caller_method->PrettyMethod();
   ArtMethod* method = nullptr;
   ImTable* imt = cls->GetImt(kRuntimePointerSize);
 
@@ -2284,7 +2294,9 @@
       ScopedObjectAccessUnchecked soa(self->GetJniEnv());
       RememberForGcArgumentVisitor visitor(sp, false, shorty, shorty_len, &soa);
       visitor.VisitArguments();
-      method = FindMethodFromCode<kInterface, false>(dex_method_idx, &this_object, caller_method,
+      method = FindMethodFromCode<kInterface, false>(dex_method_idx,
+                                                     &this_object,
+                                                     caller_method,
                                                      self);
       visitor.FixupReferences();
     }
@@ -2321,7 +2333,7 @@
   const void* code = method->GetEntryPointFromQuickCompiledCode();
 
   // When we return, the caller will branch to this address, so it had better not be 0!
-  DCHECK(code != nullptr) << "Code was null in method: " << PrettyMethod(method)
+  DCHECK(code != nullptr) << "Code was null in method: " << method->PrettyMethod()
                           << " location: " << method->GetDexFile()->GetLocation();
 
   return GetTwoWordSuccessValue(reinterpret_cast<uintptr_t>(code),
diff --git a/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
index 553c092..1cd641b 100644
--- a/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
+++ b/runtime/entrypoints/quick/quick_trampoline_entrypoints_test.cc
@@ -94,6 +94,7 @@
   CHECK_FRAME_SIZE(kArm);
   CHECK_FRAME_SIZE(kArm64);
   CHECK_FRAME_SIZE(kMips);
+  CHECK_FRAME_SIZE(kMips64);
   CHECK_FRAME_SIZE(kX86);
   CHECK_FRAME_SIZE(kX86_64);
 }
@@ -104,6 +105,7 @@
   EXPECT_EQ(GetInstructionSetPointerSize(kArm), GetConstExprPointerSize(kArm));
   EXPECT_EQ(GetInstructionSetPointerSize(kArm64), GetConstExprPointerSize(kArm64));
   EXPECT_EQ(GetInstructionSetPointerSize(kMips), GetConstExprPointerSize(kMips));
+  EXPECT_EQ(GetInstructionSetPointerSize(kMips64), GetConstExprPointerSize(kMips64));
   EXPECT_EQ(GetInstructionSetPointerSize(kX86), GetConstExprPointerSize(kX86));
   EXPECT_EQ(GetInstructionSetPointerSize(kX86_64), GetConstExprPointerSize(kX86_64));
 }
diff --git a/runtime/entrypoints_order_test.cc b/runtime/entrypoints_order_test.cc
index 03254ab..b0463d7 100644
--- a/runtime/entrypoints_order_test.cc
+++ b/runtime/entrypoints_order_test.cc
@@ -122,9 +122,9 @@
 
     // Skip across the entrypoints structures.
 
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_pos, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_pos, thread_local_end, sizeof(void*));
-    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_objects, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_end, thread_local_start, sizeof(void*));
+    EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_start, thread_local_objects, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, thread_local_objects, mterp_current_ibase, sizeof(size_t));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_current_ibase, mterp_default_ibase, sizeof(void*));
     EXPECT_OFFSET_DIFFP(Thread, tlsPtr_, mterp_default_ibase, mterp_alt_ibase, sizeof(void*));
@@ -174,8 +174,9 @@
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pAllocStringFromString, pInstanceofNonTrivial,
                          sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pInstanceofNonTrivial, pCheckCast, sizeof(void*));
-    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCheckCast, pInitializeStaticStorage, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pInstanceofNonTrivial, pCheckInstanceOf, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pCheckInstanceOf, pInitializeStaticStorage,
+                         sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pInitializeStaticStorage, pInitializeTypeAndVerifyAccess,
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pInitializeTypeAndVerifyAccess, pInitializeType,
@@ -223,6 +224,8 @@
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndSynchronized, pJniMethodEndWithReference,
                          sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndWithReference,
+                         pJniMethodFastEndWithReference, sizeof(void*));
+    EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodFastEndWithReference,
                          pJniMethodEndWithReferenceSynchronized, sizeof(void*));
     EXPECT_OFFSET_DIFFNP(QuickEntryPoints, pJniMethodEndWithReferenceSynchronized,
                          pQuickGenericJniTrampoline, sizeof(void*));
diff --git a/runtime/gc/accounting/card_table-inl.h b/runtime/gc/accounting/card_table-inl.h
index f72f219..6ff5359 100644
--- a/runtime/gc/accounting/card_table-inl.h
+++ b/runtime/gc/accounting/card_table-inl.h
@@ -50,13 +50,17 @@
 }
 
 template <bool kClearCard, typename Visitor>
-inline size_t CardTable::Scan(ContinuousSpaceBitmap* bitmap, uint8_t* scan_begin, uint8_t* scan_end,
-                              const Visitor& visitor, const uint8_t minimum_age) const {
+inline size_t CardTable::Scan(ContinuousSpaceBitmap* bitmap,
+                              uint8_t* const scan_begin,
+                              uint8_t* const scan_end,
+                              const Visitor& visitor,
+                              const uint8_t minimum_age) {
   DCHECK_GE(scan_begin, reinterpret_cast<uint8_t*>(bitmap->HeapBegin()));
   // scan_end is the byte after the last byte we scan.
   DCHECK_LE(scan_end, reinterpret_cast<uint8_t*>(bitmap->HeapLimit()));
-  uint8_t* card_cur = CardFromAddr(scan_begin);
-  uint8_t* card_end = CardFromAddr(AlignUp(scan_end, kCardSize));
+  uint8_t* const card_begin = CardFromAddr(scan_begin);
+  uint8_t* const card_end = CardFromAddr(AlignUp(scan_end, kCardSize));
+  uint8_t* card_cur = card_begin;
   CheckCardValid(card_cur);
   CheckCardValid(card_end);
   size_t cards_scanned = 0;
@@ -67,9 +71,6 @@
       uintptr_t start = reinterpret_cast<uintptr_t>(AddrFromCard(card_cur));
       bitmap->VisitMarkedRange(start, start + kCardSize, visitor);
       ++cards_scanned;
-      if (kClearCard) {
-        *card_cur = 0;
-      }
     }
     ++card_cur;
   }
@@ -99,9 +100,6 @@
             << "card " << static_cast<size_t>(*card) << " intptr_t " << (start_word & 0xFF);
         bitmap->VisitMarkedRange(start, start + kCardSize, visitor);
         ++cards_scanned;
-        if (kClearCard) {
-          *card = 0;
-        }
       }
       start_word >>= 8;
       start += kCardSize;
@@ -116,13 +114,14 @@
       uintptr_t start = reinterpret_cast<uintptr_t>(AddrFromCard(card_cur));
       bitmap->VisitMarkedRange(start, start + kCardSize, visitor);
       ++cards_scanned;
-      if (kClearCard) {
-        *card_cur = 0;
-      }
     }
     ++card_cur;
   }
 
+  if (kClearCard) {
+    ClearCardRange(scan_begin, scan_end);
+  }
+
   return cards_scanned;
 }
 
@@ -135,7 +134,9 @@
  * us to know which cards got cleared.
  */
 template <typename Visitor, typename ModifiedVisitor>
-inline void CardTable::ModifyCardsAtomic(uint8_t* scan_begin, uint8_t* scan_end, const Visitor& visitor,
+inline void CardTable::ModifyCardsAtomic(uint8_t* scan_begin,
+                                         uint8_t* scan_end,
+                                         const Visitor& visitor,
                                          const ModifiedVisitor& modified) {
   uint8_t* card_cur = CardFromAddr(scan_begin);
   uint8_t* card_end = CardFromAddr(AlignUp(scan_end, kCardSize));
diff --git a/runtime/gc/accounting/card_table.cc b/runtime/gc/accounting/card_table.cc
index 121da37..4506597 100644
--- a/runtime/gc/accounting/card_table.cc
+++ b/runtime/gc/accounting/card_table.cc
@@ -97,36 +97,18 @@
   // Destroys MemMap via std::unique_ptr<>.
 }
 
-void CardTable::ClearSpaceCards(space::ContinuousSpace* space) {
-  // TODO: clear just the range of the table that has been modified
-  uint8_t* card_start = CardFromAddr(space->Begin());
-  uint8_t* card_end = CardFromAddr(space->End());  // Make sure to round up.
-  memset(reinterpret_cast<void*>(card_start), kCardClean, card_end - card_start);
-}
-
 void CardTable::ClearCardTable() {
   static_assert(kCardClean == 0, "kCardClean must be 0");
   mem_map_->MadviseDontNeedAndZero();
 }
 
 void CardTable::ClearCardRange(uint8_t* start, uint8_t* end) {
-  if (!kMadviseZeroes) {
-    memset(start, 0, end - start);
-    return;
-  }
   CHECK_ALIGNED(reinterpret_cast<uintptr_t>(start), kCardSize);
   CHECK_ALIGNED(reinterpret_cast<uintptr_t>(end), kCardSize);
   static_assert(kCardClean == 0, "kCardClean must be 0");
   uint8_t* start_card = CardFromAddr(start);
   uint8_t* end_card = CardFromAddr(end);
-  uint8_t* round_start = AlignUp(start_card, kPageSize);
-  uint8_t* round_end = AlignDown(end_card, kPageSize);
-  if (round_start < round_end) {
-    madvise(round_start, round_end - round_start, MADV_DONTNEED);
-  }
-  // Handle unaligned regions at start / end.
-  memset(start_card, 0, std::min(round_start, end_card) - start_card);
-  memset(std::max(round_end, start_card), 0, end_card - std::max(round_end, start_card));
+  ZeroAndReleasePages(start_card, end_card - start_card);
 }
 
 bool CardTable::AddrIsInCardTable(const void* addr) const {
diff --git a/runtime/gc/accounting/card_table.h b/runtime/gc/accounting/card_table.h
index 969bfb7..68ef15d 100644
--- a/runtime/gc/accounting/card_table.h
+++ b/runtime/gc/accounting/card_table.h
@@ -98,15 +98,19 @@
    * us to know which cards got cleared.
    */
   template <typename Visitor, typename ModifiedVisitor>
-  void ModifyCardsAtomic(uint8_t* scan_begin, uint8_t* scan_end, const Visitor& visitor,
+  void ModifyCardsAtomic(uint8_t* scan_begin,
+                         uint8_t* scan_end,
+                         const Visitor& visitor,
                          const ModifiedVisitor& modified);
 
   // For every dirty at least minumum age between begin and end invoke the visitor with the
   // specified argument. Returns how many cards the visitor was run on.
   template <bool kClearCard, typename Visitor>
-  size_t Scan(SpaceBitmap<kObjectAlignment>* bitmap, uint8_t* scan_begin, uint8_t* scan_end,
+  size_t Scan(SpaceBitmap<kObjectAlignment>* bitmap,
+              uint8_t* scan_begin,
+              uint8_t* scan_end,
               const Visitor& visitor,
-              const uint8_t minimum_age = kCardDirty) const
+              const uint8_t minimum_age = kCardDirty)
       REQUIRES(Locks::heap_bitmap_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -119,9 +123,6 @@
   // Clear a range of cards that covers start to end, start and end must be aligned to kCardSize.
   void ClearCardRange(uint8_t* start, uint8_t* end);
 
-  // Resets all of the bytes in the card table which do not map to the image space.
-  void ClearSpaceCards(space::ContinuousSpace* space);
-
   // Returns the first address in the heap which maps to this card.
   void* AddrFromCard(const uint8_t *card_addr) const ALWAYS_INLINE;
 
diff --git a/runtime/gc/accounting/mod_union_table.cc b/runtime/gc/accounting/mod_union_table.cc
index 3b6750e..0325535 100644
--- a/runtime/gc/accounting/mod_union_table.cc
+++ b/runtime/gc/accounting/mod_union_table.cc
@@ -168,7 +168,7 @@
   bool* const contains_reference_to_other_space_;
 };
 
-void ModUnionTableReferenceCache::ClearCards() {
+void ModUnionTableReferenceCache::ProcessCards() {
   CardTable* card_table = GetHeap()->GetCardTable();
   ModUnionAddToCardSetVisitor visitor(&cleared_cards_);
   // Clear dirty cards in the this space and update the corresponding mod-union bits.
@@ -276,8 +276,9 @@
       Heap* heap = mod_union_table_->GetHeap();
       space::ContinuousSpace* from_space = heap->FindContinuousSpaceFromObject(obj, false);
       space::ContinuousSpace* to_space = heap->FindContinuousSpaceFromObject(ref, false);
-      LOG(INFO) << "Object " << reinterpret_cast<const void*>(obj) << "(" << PrettyTypeOf(obj)
-          << ")" << "References " << reinterpret_cast<const void*>(ref) << "(" << PrettyTypeOf(ref)
+      LOG(INFO) << "Object " << reinterpret_cast<const void*>(obj) << "(" << obj->PrettyTypeOf()
+                << ")" << "References "
+                << reinterpret_cast<const void*>(ref) << "(" << mirror::Object::PrettyTypeOf(ref)
           << ") without being in mod-union table";
       LOG(INFO) << "FromSpace " << from_space->GetName() << " type "
           << from_space->GetGcRetentionPolicy();
@@ -524,7 +525,7 @@
   ModUnionTable::CardBitmap* const card_bitmap_;
 };
 
-void ModUnionTableCardCache::ClearCards() {
+void ModUnionTableCardCache::ProcessCards() {
   CardTable* const card_table = GetHeap()->GetCardTable();
   ModUnionAddToCardBitmapVisitor visitor(card_bitmap_.get(), card_table);
   // Clear dirty cards in the this space and update the corresponding mod-union bits.
diff --git a/runtime/gc/accounting/mod_union_table.h b/runtime/gc/accounting/mod_union_table.h
index b6792c4..591365f 100644
--- a/runtime/gc/accounting/mod_union_table.h
+++ b/runtime/gc/accounting/mod_union_table.h
@@ -55,10 +55,10 @@
 
   virtual ~ModUnionTable() {}
 
-  // Clear cards which map to a memory range of a space. This doesn't immediately update the
-  // mod-union table, as updating the mod-union table may have an associated cost, such as
-  // determining references to track.
-  virtual void ClearCards() = 0;
+  // Process cards for a memory range of a space. This doesn't immediately update the mod-union
+  // table, as updating the mod-union table may have an associated cost, such as determining
+  // references to track.
+  virtual void ProcessCards() = 0;
 
   // Set all the cards.
   virtual void SetCards() = 0;
@@ -66,9 +66,9 @@
   // Clear all of the table.
   virtual void ClearTable() = 0;
 
-  // Update the mod-union table using data stored by ClearCards. There may be multiple ClearCards
-  // before a call to update, for example, back-to-back sticky GCs. Also mark references to other
-  // spaces which are stored in the mod-union table.
+  // Update the mod-union table using data stored by ProcessCards. There may be multiple
+  // ProcessCards before a call to update, for example, back-to-back sticky GCs. Also mark
+  // references to other spaces which are stored in the mod-union table.
   virtual void UpdateAndMarkReferences(MarkObjectVisitor* visitor) = 0;
 
   // Visit all of the objects that may contain references to other spaces.
@@ -117,7 +117,7 @@
   virtual ~ModUnionTableReferenceCache() {}
 
   // Clear and store cards for a space.
-  void ClearCards() OVERRIDE;
+  void ProcessCards() OVERRIDE;
 
   // Update table based on cleared cards and mark all references to the other spaces.
   void UpdateAndMarkReferences(MarkObjectVisitor* visitor) OVERRIDE
@@ -164,7 +164,7 @@
   virtual ~ModUnionTableCardCache() {}
 
   // Clear and store cards for a space.
-  virtual void ClearCards() OVERRIDE;
+  virtual void ProcessCards() OVERRIDE;
 
   // Mark all references to the alloc space(s).
   virtual void UpdateAndMarkReferences(MarkObjectVisitor* visitor) OVERRIDE
diff --git a/runtime/gc/accounting/mod_union_table_test.cc b/runtime/gc/accounting/mod_union_table_test.cc
index 2810f58..cf63b30 100644
--- a/runtime/gc/accounting/mod_union_table_test.cc
+++ b/runtime/gc/accounting/mod_union_table_test.cc
@@ -214,7 +214,7 @@
   ASSERT_TRUE(other_space_ref2 != nullptr);
   obj1->Set(1, other_space_ref1);
   obj2->Set(3, other_space_ref2);
-  table->ClearCards();
+  table->ProcessCards();
   std::set<mirror::Object*> visited_before;
   CollectVisitedVisitor collector_before(&visited_before);
   table->UpdateAndMarkReferences(&collector_before);
diff --git a/runtime/gc/accounting/remembered_set.cc b/runtime/gc/accounting/remembered_set.cc
index 7229f76..29bab01 100644
--- a/runtime/gc/accounting/remembered_set.cc
+++ b/runtime/gc/accounting/remembered_set.cc
@@ -66,7 +66,9 @@
       : collector_(collector), target_space_(target_space),
         contains_reference_to_target_space_(contains_reference_to_target_space) {}
 
-  void operator()(mirror::Object* obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const
+  void operator()(ObjPtr<mirror::Object> obj,
+                  MemberOffset offset,
+                  bool is_static ATTRIBUTE_UNUSED) const
       REQUIRES_SHARED(Locks::mutator_lock_) {
     DCHECK(obj != nullptr);
     mirror::HeapReference<mirror::Object>* ref_ptr = obj->GetFieldObjectReferenceAddr(offset);
@@ -77,7 +79,7 @@
     }
   }
 
-  void operator()(mirror::Class* klass, mirror::Reference* ref) const
+  void operator()(ObjPtr<mirror::Class> klass, ObjPtr<mirror::Reference> ref) const
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
     if (target_space_->HasAddress(ref->GetReferent())) {
       *contains_reference_to_target_space_ = true;
@@ -115,7 +117,7 @@
       : collector_(collector), target_space_(target_space),
         contains_reference_to_target_space_(contains_reference_to_target_space) {}
 
-  void operator()(mirror::Object* obj) const REQUIRES(Locks::heap_bitmap_lock_)
+  void operator()(ObjPtr<mirror::Object> obj) const REQUIRES(Locks::heap_bitmap_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     RememberedSetReferenceVisitor visitor(target_space_, contains_reference_to_target_space_,
                                           collector_);
diff --git a/runtime/gc/accounting/space_bitmap.cc b/runtime/gc/accounting/space_bitmap.cc
index a968343..f4d0bc7 100644
--- a/runtime/gc/accounting/space_bitmap.cc
+++ b/runtime/gc/accounting/space_bitmap.cc
@@ -104,6 +104,25 @@
 }
 
 template<size_t kAlignment>
+void SpaceBitmap<kAlignment>::ClearRange(const mirror::Object* begin, const mirror::Object* end) {
+  uintptr_t begin_offset = reinterpret_cast<uintptr_t>(begin) - heap_begin_;
+  uintptr_t end_offset = reinterpret_cast<uintptr_t>(end) - heap_begin_;
+  // Align begin and end to word boundaries.
+  while (begin_offset < end_offset && OffsetBitIndex(begin_offset) != 0) {
+    Clear(reinterpret_cast<mirror::Object*>(heap_begin_ + begin_offset));
+    begin_offset += kAlignment;
+  }
+  while (begin_offset < end_offset && OffsetBitIndex(end_offset) != 0) {
+    end_offset -= kAlignment;
+    Clear(reinterpret_cast<mirror::Object*>(heap_begin_ + end_offset));
+  }
+  const uintptr_t start_index = OffsetToIndex(begin_offset);
+  const uintptr_t end_index = OffsetToIndex(end_offset);
+  ZeroAndReleasePages(reinterpret_cast<uint8_t*>(&bitmap_begin_[start_index]),
+                      (end_index - start_index) * sizeof(*bitmap_begin_));
+}
+
+template<size_t kAlignment>
 void SpaceBitmap<kAlignment>::CopyFrom(SpaceBitmap* source_bitmap) {
   DCHECK_EQ(Size(), source_bitmap->Size());
   const size_t count = source_bitmap->Size() / sizeof(intptr_t);
diff --git a/runtime/gc/accounting/space_bitmap.h b/runtime/gc/accounting/space_bitmap.h
index 296663a..b136488 100644
--- a/runtime/gc/accounting/space_bitmap.h
+++ b/runtime/gc/accounting/space_bitmap.h
@@ -68,9 +68,13 @@
     return static_cast<T>(index * kAlignment * kBitsPerIntPtrT);
   }
 
+  ALWAYS_INLINE static constexpr uintptr_t OffsetBitIndex(uintptr_t offset) {
+    return (offset / kAlignment) % kBitsPerIntPtrT;
+  }
+
   // Bits are packed in the obvious way.
   static constexpr uintptr_t OffsetToMask(uintptr_t offset) {
-    return (static_cast<size_t>(1)) << ((offset / kAlignment) % kBitsPerIntPtrT);
+    return static_cast<size_t>(1) << OffsetBitIndex(offset);
   }
 
   bool Set(const mirror::Object* obj) ALWAYS_INLINE {
@@ -87,6 +91,9 @@
   // Fill the bitmap with zeroes.  Returns the bitmap's memory to the system as a side-effect.
   void Clear();
 
+  // Clear a covered by the bitmap using madvise if possible.
+  void ClearRange(const mirror::Object* begin, const mirror::Object* end);
+
   bool Test(const mirror::Object* obj) const;
 
   // Return true iff <obj> is within the range of pointers that this bitmap could potentially cover,
diff --git a/runtime/gc/accounting/space_bitmap_test.cc b/runtime/gc/accounting/space_bitmap_test.cc
index edb08ef..8c06cfd 100644
--- a/runtime/gc/accounting/space_bitmap_test.cc
+++ b/runtime/gc/accounting/space_bitmap_test.cc
@@ -62,7 +62,7 @@
 
   std::unique_ptr<ContinuousSpaceBitmap> space_bitmap(
       ContinuousSpaceBitmap::Create("test bitmap", heap_begin, heap_capacity));
-  EXPECT_TRUE(space_bitmap.get() != nullptr);
+  EXPECT_TRUE(space_bitmap != nullptr);
 
   // Set all the odd bits in the first BitsPerIntPtrT * 3 to one.
   for (size_t j = 0; j < kBitsPerIntPtrT * 3; ++j) {
@@ -87,6 +87,48 @@
   }
 }
 
+TEST_F(SpaceBitmapTest, ClearRange) {
+  uint8_t* heap_begin = reinterpret_cast<uint8_t*>(0x10000000);
+  size_t heap_capacity = 16 * MB;
+
+  std::unique_ptr<ContinuousSpaceBitmap> bitmap(
+      ContinuousSpaceBitmap::Create("test bitmap", heap_begin, heap_capacity));
+  EXPECT_TRUE(bitmap != nullptr);
+
+  // Set all of the bits in the bitmap.
+  for (size_t j = 0; j < heap_capacity; j += kObjectAlignment) {
+    const mirror::Object* obj = reinterpret_cast<mirror::Object*>(heap_begin + j);
+    bitmap->Set(obj);
+  }
+
+  std::vector<std::pair<uintptr_t, uintptr_t>> ranges = {
+      {0, 10 * KB + kObjectAlignment},
+      {kObjectAlignment, kObjectAlignment},
+      {kObjectAlignment, 2 * kObjectAlignment},
+      {kObjectAlignment, 5 * kObjectAlignment},
+      {1 * KB + kObjectAlignment, 2 * KB + 5 * kObjectAlignment},
+  };
+  // Try clearing a few ranges.
+  for (const std::pair<uintptr_t, uintptr_t>& range : ranges) {
+    const mirror::Object* obj_begin = reinterpret_cast<mirror::Object*>(heap_begin + range.first);
+    const mirror::Object* obj_end = reinterpret_cast<mirror::Object*>(heap_begin + range.second);
+    bitmap->ClearRange(obj_begin, obj_end);
+    // Boundaries should still be marked.
+    for (uintptr_t i = 0; i < range.first; i += kObjectAlignment) {
+      EXPECT_TRUE(bitmap->Test(reinterpret_cast<mirror::Object*>(heap_begin + i)));
+    }
+    for (uintptr_t i = range.second; i < range.second + kPageSize; i += kObjectAlignment) {
+      EXPECT_TRUE(bitmap->Test(reinterpret_cast<mirror::Object*>(heap_begin + i)));
+    }
+    // Everything inside should be cleared.
+    for (uintptr_t i = range.first; i < range.second; i += kObjectAlignment) {
+      EXPECT_FALSE(bitmap->Test(reinterpret_cast<mirror::Object*>(heap_begin + i)));
+      bitmap->Set(reinterpret_cast<mirror::Object*>(heap_begin + i));
+    }
+  }
+}
+
+
 class SimpleCounter {
  public:
   explicit SimpleCounter(size_t* counter) : count_(counter) {}
diff --git a/runtime/gc/allocation_listener.h b/runtime/gc/allocation_listener.h
new file mode 100644
index 0000000..f60bc0c
--- /dev/null
+++ b/runtime/gc/allocation_listener.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_GC_ALLOCATION_LISTENER_H_
+#define ART_RUNTIME_GC_ALLOCATION_LISTENER_H_
+
+#include <list>
+#include <memory>
+
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "obj_ptr.h"
+#include "object_callbacks.h"
+#include "gc_root.h"
+
+namespace art {
+
+namespace mirror {
+  class Object;
+}
+
+class Thread;
+
+namespace gc {
+
+class AllocationListener {
+ public:
+  virtual ~AllocationListener() {}
+
+  virtual void ObjectAllocated(Thread* self, ObjPtr<mirror::Object>* obj, size_t byte_count)
+      REQUIRES_SHARED(Locks::mutator_lock_) = 0;
+};
+
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_ALLOCATION_LISTENER_H_
diff --git a/runtime/gc/allocation_record.cc b/runtime/gc/allocation_record.cc
index 13ebb27..e18a955 100644
--- a/runtime/gc/allocation_record.cc
+++ b/runtime/gc/allocation_record.cc
@@ -19,6 +19,7 @@
 #include "art_method-inl.h"
 #include "base/enums.h"
 #include "base/stl_util.h"
+#include "obj_ptr-inl.h"
 #include "stack.h"
 
 #ifdef ART_TARGET_ANDROID
@@ -180,7 +181,6 @@
 }
 
 void AllocRecordObjectMap::BroadcastForNewAllocationRecords() {
-  CHECK(kUseReadBarrier);
   new_record_condition_.Broadcast(Thread::Current());
 }
 
@@ -263,7 +263,7 @@
 }
 
 void AllocRecordObjectMap::RecordAllocation(Thread* self,
-                                            mirror::Object** obj,
+                                            ObjPtr<mirror::Object>* obj,
                                             size_t byte_count) {
   // Get stack trace outside of lock in case there are allocations during the stack walk.
   // b/27858645.
@@ -290,6 +290,9 @@
   // Wait for GC's sweeping to complete and allow new records
   while (UNLIKELY((!kUseReadBarrier && !allow_new_record_) ||
                   (kUseReadBarrier && !self->GetWeakRefAccessEnabled()))) {
+    // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the
+    // presence of threads blocking for weak ref access.
+    self->CheckEmptyCheckpoint();
     new_record_condition_.WaitHoldingLocks(self);
   }
 
@@ -305,7 +308,7 @@
   trace.SetTid(self->GetTid());
 
   // Add the record.
-  Put(*obj, AllocRecord(byte_count, (*obj)->GetClass(), std::move(trace)));
+  Put(obj->Ptr(), AllocRecord(byte_count, (*obj)->GetClass(), std::move(trace)));
   DCHECK_LE(Size(), alloc_record_max_);
 }
 
diff --git a/runtime/gc/allocation_record.h b/runtime/gc/allocation_record.h
index f1f013b..90cff6a 100644
--- a/runtime/gc/allocation_record.h
+++ b/runtime/gc/allocation_record.h
@@ -21,6 +21,7 @@
 #include <memory>
 
 #include "base/mutex.h"
+#include "obj_ptr.h"
 #include "object_callbacks.h"
 #include "gc_root.h"
 
@@ -210,7 +211,7 @@
   // Caller needs to check that it is enabled before calling since we read the stack trace before
   // checking the enabled boolean.
   void RecordAllocation(Thread* self,
-                        mirror::Object** obj,
+                        ObjPtr<mirror::Object>* obj,
                         size_t byte_count)
       REQUIRES(!Locks::alloc_tracker_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
@@ -260,7 +261,6 @@
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(Locks::alloc_tracker_lock_);
   void BroadcastForNewAllocationRecords()
-      REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(Locks::alloc_tracker_lock_);
 
   // TODO: Is there a better way to hide the entries_'s type?
diff --git a/runtime/gc/allocator/rosalloc.cc b/runtime/gc/allocator/rosalloc.cc
index a7f2aa0..40186f8 100644
--- a/runtime/gc/allocator/rosalloc.cc
+++ b/runtime/gc/allocator/rosalloc.cc
@@ -1966,7 +1966,7 @@
       CHECK_LE(obj_size + memory_tool_modifier, kLargeSizeThreshold)
           << "A run slot contains a large object " << Dump();
       CHECK_EQ(SizeToIndex(obj_size + memory_tool_modifier), idx)
-          << PrettyTypeOf(obj) << " "
+          << obj->PrettyTypeOf() << " "
           << "obj_size=" << obj_size << "(" << obj_size + memory_tool_modifier << "), idx=" << idx
           << " A run slot contains an object with wrong size " << Dump();
     }
diff --git a/runtime/gc/collector/concurrent_copying-inl.h b/runtime/gc/collector/concurrent_copying-inl.h
index 76f500c..7c64952 100644
--- a/runtime/gc/collector/concurrent_copying-inl.h
+++ b/runtime/gc/collector/concurrent_copying-inl.h
@@ -32,7 +32,7 @@
     mirror::Object* ref, accounting::ContinuousSpaceBitmap* bitmap) {
   // For the Baker-style RB, in a rare case, we could incorrectly change the object from white
   // to gray even though the object has already been marked through. This happens if a mutator
-  // thread gets preempted before the AtomicSetReadBarrierPointer below, GC marks through the
+  // thread gets preempted before the AtomicSetReadBarrierState below, GC marks through the
   // object (changes it from white to gray and back to white), and the thread runs and
   // incorrectly changes it from white to gray. If this happens, the object will get added to the
   // mark stack again and get changed back to white after it is processed.
@@ -50,14 +50,14 @@
     // we can avoid an expensive CAS.
     // For the baker case, an object is marked if either the mark bit marked or the bitmap bit is
     // set.
-    success = ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(), ReadBarrier::GrayPtr());
+    success = ref->AtomicSetReadBarrierState(ReadBarrier::WhiteState(), ReadBarrier::GrayState());
   } else {
     success = !bitmap->AtomicTestAndSet(ref);
   }
   if (success) {
     // Newly marked.
     if (kUseBakerReadBarrier) {
-      DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr());
+      DCHECK_EQ(ref->GetReadBarrierState(), ReadBarrier::GrayState());
     }
     PushOntoMarkStack(ref);
   }
@@ -84,8 +84,8 @@
       return ref;
     }
     // This may or may not succeed, which is ok because the object may already be gray.
-    bool success = ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(),
-                                                    ReadBarrier::GrayPtr());
+    bool success = ref->AtomicSetReadBarrierState(ReadBarrier::WhiteState(),
+                                                  ReadBarrier::GrayState());
     if (success) {
       MutexLock mu(Thread::Current(), immune_gray_stack_lock_);
       immune_gray_stack_.push_back(ref);
@@ -125,10 +125,6 @@
       return from_ref;
     case space::RegionSpace::RegionType::kRegionTypeFromSpace: {
       mirror::Object* to_ref = GetFwdPtr(from_ref);
-      if (kUseBakerReadBarrier) {
-        DCHECK_NE(to_ref, ReadBarrier::GrayPtr())
-            << "from_ref=" << from_ref << " to_ref=" << to_ref;
-      }
       if (to_ref == nullptr) {
         // It isn't marked yet. Mark it by copying it to the to-space.
         to_ref = Copy(from_ref);
@@ -153,9 +149,7 @@
 
 inline mirror::Object* ConcurrentCopying::MarkFromReadBarrier(mirror::Object* from_ref) {
   mirror::Object* ret;
-  // TODO: Delete GetMarkBit check when all of the callers properly check the bit. Remaining caller
-  // is array allocations.
-  if (from_ref == nullptr || from_ref->GetMarkBit()) {
+  if (from_ref == nullptr) {
     return from_ref;
   }
   // TODO: Consider removing this check when we are done investigating slow paths. b/30162165
@@ -192,9 +186,9 @@
 
 inline bool ConcurrentCopying::IsMarkedInUnevacFromSpace(mirror::Object* from_ref) {
   // Use load acquire on the read barrier pointer to ensure that we never see a white read barrier
-  // pointer with an unmarked bit due to reordering.
+  // state with an unmarked bit due to reordering.
   DCHECK(region_space_->IsInUnevacFromSpace(from_ref));
-  if (kUseBakerReadBarrier && from_ref->GetReadBarrierPointerAcquire() == ReadBarrier::GrayPtr()) {
+  if (kUseBakerReadBarrier && from_ref->GetReadBarrierStateAcquire() == ReadBarrier::GrayState()) {
     return true;
   }
   return region_space_bitmap_->Test(from_ref);
diff --git a/runtime/gc/collector/concurrent_copying.cc b/runtime/gc/collector/concurrent_copying.cc
index 8b91075..8353b26 100644
--- a/runtime/gc/collector/concurrent_copying.cc
+++ b/runtime/gc/collector/concurrent_copying.cc
@@ -170,10 +170,10 @@
       CHECK(space->IsZygoteSpace() || space->IsImageSpace());
       immune_spaces_.AddSpace(space);
     } else if (space == region_space_) {
-      accounting::ContinuousSpaceBitmap* bitmap =
-          accounting::ContinuousSpaceBitmap::Create("cc region space bitmap",
-                                                    space->Begin(), space->Capacity());
-      region_space_bitmap_ = bitmap;
+      // It is OK to clear the bitmap with mutators running since the only place it is read is
+      // VisitObjects which has exclusion with CC.
+      region_space_bitmap_ = region_space_->GetMarkBitmap();
+      region_space_bitmap_->Clear();
     }
   }
 }
@@ -246,7 +246,7 @@
     Thread* self = Thread::Current();
     CHECK(thread == self || thread->IsSuspended() || thread->GetState() == kWaitingPerformingGc)
         << thread->GetState() << " thread " << thread << " self " << self;
-    thread->SetIsGcMarking(true);
+    thread->SetIsGcMarkingAndUpdateEntrypoints(true);
     if (use_tlab_ && thread->HasTlab()) {
       if (ConcurrentCopying::kEnableFromSpaceAccountingCheck) {
         // This must come before the revoke.
@@ -354,14 +354,14 @@
   explicit VerifyGrayImmuneObjectsVisitor(ConcurrentCopying* collector)
       : collector_(collector) {}
 
-  void operator()(mirror::Object* obj, MemberOffset offset, bool /* is_static */)
+  void operator()(ObjPtr<mirror::Object> obj, MemberOffset offset, bool /* is_static */)
       const ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES_SHARED(Locks::heap_bitmap_lock_) {
     CheckReference(obj->GetFieldObject<mirror::Object, kVerifyNone, kWithoutReadBarrier>(offset),
                    obj, offset);
   }
 
-  void operator()(mirror::Class* klass, mirror::Reference* ref) const
+  void operator()(ObjPtr<mirror::Class> klass, ObjPtr<mirror::Reference> ref) const
       REQUIRES_SHARED(Locks::mutator_lock_) ALWAYS_INLINE {
     CHECK(klass->IsTypeOfReferenceClass());
     CheckReference(ref->GetReferent<kWithoutReadBarrier>(),
@@ -386,16 +386,18 @@
  private:
   ConcurrentCopying* const collector_;
 
-  void CheckReference(mirror::Object* ref, mirror::Object* holder, MemberOffset offset) const
+  void CheckReference(ObjPtr<mirror::Object> ref,
+                      ObjPtr<mirror::Object> holder,
+                      MemberOffset offset) const
       REQUIRES_SHARED(Locks::mutator_lock_) {
     if (ref != nullptr) {
-      if (!collector_->immune_spaces_.ContainsObject(ref)) {
+      if (!collector_->immune_spaces_.ContainsObject(ref.Ptr())) {
         // Not immune, must be a zygote large object.
         CHECK(Runtime::Current()->GetHeap()->GetLargeObjectsSpace()->IsZygoteLargeObject(
-            Thread::Current(), ref))
+            Thread::Current(), ref.Ptr()))
             << "Non gray object references non immune, non zygote large object "<< ref << " "
-            << PrettyTypeOf(ref) << " in holder " << holder << " " << PrettyTypeOf(holder)
-            << " offset=" << offset.Uint32Value();
+            << mirror::Object::PrettyTypeOf(ref) << " in holder " << holder << " "
+            << mirror::Object::PrettyTypeOf(holder) << " offset=" << offset.Uint32Value();
       } else {
         // Make sure the large object class is immune since we will never scan the large object.
         CHECK(collector_->immune_spaces_.ContainsObject(
@@ -416,7 +418,7 @@
                                   [&visitor](mirror::Object* obj)
         REQUIRES_SHARED(Locks::mutator_lock_) {
       // If an object is not gray, it should only have references to things in the immune spaces.
-      if (obj->GetReadBarrierPointer() != ReadBarrier::GrayPtr()) {
+      if (obj->GetReadBarrierState() != ReadBarrier::GrayState()) {
         obj->VisitReferences</*kVisitNativeRoots*/true,
                              kDefaultVerifyFlags,
                              kWithoutReadBarrier>(visitor, visitor);
@@ -461,7 +463,7 @@
       if (kIsDebugBuild) {
         Locks::mutator_lock_->AssertExclusiveHeld(Thread::Current());
       }
-      obj->SetReadBarrierPointer(ReadBarrier::GrayPtr());
+      obj->SetReadBarrierState(ReadBarrier::GrayState());
     }
   }
 
@@ -484,9 +486,14 @@
     // Table is non null for boot image and zygote spaces. It is only null for application image
     // spaces.
     if (table != nullptr) {
-      // TODO: Add preclean outside the pause.
-      table->ClearCards();
+      // TODO: Consider adding precleaning outside the pause.
+      table->ProcessCards();
       table->VisitObjects(GrayImmuneObjectVisitor::Callback, &visitor);
+      // Since the cards are recorded in the mod-union table and this is paused, we can clear
+      // the cards for the space (to madvise).
+      TimingLogger::ScopedTiming split2("(Paused)ClearCards", GetTimings());
+      card_table->ClearCardRange(space->Begin(),
+                                 AlignDown(space->End(), accounting::CardTable::kCardSize));
     } else {
       // TODO: Consider having a mark bitmap for app image spaces and avoid scanning during the
       // pause because app image spaces are all dirty pages anyways.
@@ -507,26 +514,6 @@
   live_stack_freeze_size_ = heap_->GetLiveStack()->Size();
 }
 
-class EmptyCheckpoint : public Closure {
- public:
-  explicit EmptyCheckpoint(ConcurrentCopying* concurrent_copying)
-      : concurrent_copying_(concurrent_copying) {
-  }
-
-  virtual void Run(Thread* thread) OVERRIDE NO_THREAD_SAFETY_ANALYSIS {
-    // Note: self is not necessarily equal to thread since thread may be suspended.
-    Thread* self = Thread::Current();
-    CHECK(thread == self || thread->IsSuspended() || thread->GetState() == kWaitingPerformingGc)
-        << thread->GetState() << " thread " << thread << " self " << self;
-    // If thread is a running mutator, then act on behalf of the garbage collector.
-    // See the code in ThreadList::RunCheckpoint.
-    concurrent_copying_->GetBarrier().Pass(self);
-  }
-
- private:
-  ConcurrentCopying* const concurrent_copying_;
-};
-
 // Used to visit objects in the immune spaces.
 inline void ConcurrentCopying::ScanImmuneObject(mirror::Object* obj) {
   DCHECK(obj != nullptr);
@@ -542,11 +529,11 @@
 
   ALWAYS_INLINE void operator()(mirror::Object* obj) const REQUIRES_SHARED(Locks::mutator_lock_) {
     if (kUseBakerReadBarrier && kGrayDirtyImmuneObjects) {
-      if (obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+      if (obj->GetReadBarrierState() == ReadBarrier::GrayState()) {
         collector_->ScanImmuneObject(obj);
         // Done scanning the object, go back to white.
-        bool success = obj->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
-                                                        ReadBarrier::WhitePtr());
+        bool success = obj->AtomicSetReadBarrierState(ReadBarrier::GrayState(),
+                                                      ReadBarrier::WhiteState());
         CHECK(success);
       }
     } else {
@@ -613,9 +600,9 @@
       LOG(INFO) << "immune gray stack size=" << immune_gray_stack_.size();
     }
     for (mirror::Object* obj : immune_gray_stack_) {
-      DCHECK(obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
-      bool success = obj->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
-                                                      ReadBarrier::WhitePtr());
+      DCHECK(obj->GetReadBarrierState() == ReadBarrier::GrayState());
+      bool success = obj->AtomicSetReadBarrierState(ReadBarrier::GrayState(),
+                                                    ReadBarrier::WhiteState());
       DCHECK(success);
     }
     immune_gray_stack_.clear();
@@ -739,7 +726,7 @@
     // Disable the thread-local is_gc_marking flag.
     // Note a thread that has just started right before this checkpoint may have already this flag
     // set to false, which is ok.
-    thread->SetIsGcMarking(false);
+    thread->SetIsGcMarkingAndUpdateEntrypoints(false);
     // If thread is a running mutator, then act on behalf of the garbage collector.
     // See the code in ThreadList::RunCheckpoint.
     concurrent_copying_->GetBarrier().Pass(self);
@@ -814,11 +801,11 @@
   for (mirror::Object* obj : false_gray_stack_) {
     DCHECK(IsMarked(obj));
     // The object could be white here if a thread got preempted after a success at the
-    // AtomicSetReadBarrierPointer in Mark(), GC started marking through it (but not finished so
+    // AtomicSetReadBarrierState in Mark(), GC started marking through it (but not finished so
     // still gray), and the thread ran to register it onto the false gray stack.
-    if (obj->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
-      bool success = obj->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(),
-                                                      ReadBarrier::WhitePtr());
+    if (obj->GetReadBarrierState() == ReadBarrier::GrayState()) {
+      bool success = obj->AtomicSetReadBarrierState(ReadBarrier::GrayState(),
+                                                    ReadBarrier::WhiteState());
       DCHECK(success);
     }
   }
@@ -828,10 +815,10 @@
 
 void ConcurrentCopying::IssueEmptyCheckpoint() {
   Thread* self = Thread::Current();
-  EmptyCheckpoint check_point(this);
   ThreadList* thread_list = Runtime::Current()->GetThreadList();
-  gc_barrier_->Init(self, 0);
-  size_t barrier_count = thread_list->RunCheckpoint(&check_point);
+  Barrier* barrier = thread_list->EmptyCheckpointBarrier();
+  barrier->Init(self, 0);
+  size_t barrier_count = thread_list->RunEmptyCheckpoint();
   // If there are no threads to wait which implys that all the checkpoint functions are finished,
   // then no need to release the mutator lock.
   if (barrier_count == 0) {
@@ -841,7 +828,7 @@
   Locks::mutator_lock_->SharedUnlock(self);
   {
     ScopedThreadStateChange tsc(self, kWaitingForCheckPointsToRun);
-    gc_barrier_->Increment(self, barrier_count);
+    barrier->Increment(self, barrier_count);
   }
   Locks::mutator_lock_->SharedLock(self);
 }
@@ -860,7 +847,7 @@
 
 void ConcurrentCopying::PushOntoMarkStack(mirror::Object* to_ref) {
   CHECK_EQ(is_mark_stack_push_disallowed_.LoadRelaxed(), 0)
-      << " " << to_ref << " " << PrettyTypeOf(to_ref);
+      << " " << to_ref << " " << mirror::Object::PrettyTypeOf(to_ref);
   Thread* self = Thread::Current();  // TODO: pass self as an argument from call sites?
   CHECK(thread_running_gc_ != nullptr);
   MarkStackMode mark_stack_mode = mark_stack_mode_.LoadRelaxed();
@@ -948,9 +935,9 @@
     }
     collector_->AssertToSpaceInvariant(nullptr, MemberOffset(0), ref);
     if (kUseBakerReadBarrier) {
-      CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr())
-          << "Ref " << ref << " " << PrettyTypeOf(ref)
-          << " has non-white rb_ptr ";
+      CHECK_EQ(ref->GetReadBarrierState(), ReadBarrier::WhiteState())
+          << "Ref " << ref << " " << ref->PrettyTypeOf()
+          << " has non-white rb_state ";
     }
   }
 
@@ -969,14 +956,17 @@
   explicit VerifyNoFromSpaceRefsFieldVisitor(ConcurrentCopying* collector)
       : collector_(collector) {}
 
-  void operator()(mirror::Object* obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const
+  void operator()(ObjPtr<mirror::Object> obj,
+                  MemberOffset offset,
+                  bool is_static ATTRIBUTE_UNUSED) const
       REQUIRES_SHARED(Locks::mutator_lock_) ALWAYS_INLINE {
     mirror::Object* ref =
         obj->GetFieldObject<mirror::Object, kDefaultVerifyFlags, kWithoutReadBarrier>(offset);
     VerifyNoFromSpaceRefsVisitor visitor(collector_);
     visitor(ref);
   }
-  void operator()(mirror::Class* klass, mirror::Reference* ref) const
+  void operator()(ObjPtr<mirror::Class> klass,
+                  ObjPtr<mirror::Reference> ref) const
       REQUIRES_SHARED(Locks::mutator_lock_) ALWAYS_INLINE {
     CHECK(klass->IsTypeOfReferenceClass());
     this->operator()(ref, mirror::Reference::ReferentOffset(), false);
@@ -1016,8 +1006,8 @@
     VerifyNoFromSpaceRefsFieldVisitor visitor(collector);
     obj->VisitReferences(visitor, visitor);
     if (kUseBakerReadBarrier) {
-      CHECK_EQ(obj->GetReadBarrierPointer(), ReadBarrier::WhitePtr())
-          << "obj=" << obj << " non-white rb_ptr " << obj->GetReadBarrierPointer();
+      CHECK_EQ(obj->GetReadBarrierState(), ReadBarrier::WhiteState())
+          << "obj=" << obj << " non-white rb_state " << obj->GetReadBarrierState();
     }
   }
 
@@ -1091,14 +1081,16 @@
   explicit AssertToSpaceInvariantFieldVisitor(ConcurrentCopying* collector)
       : collector_(collector) {}
 
-  void operator()(mirror::Object* obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const
+  void operator()(ObjPtr<mirror::Object> obj,
+                  MemberOffset offset,
+                  bool is_static ATTRIBUTE_UNUSED) const
       REQUIRES_SHARED(Locks::mutator_lock_) ALWAYS_INLINE {
     mirror::Object* ref =
         obj->GetFieldObject<mirror::Object, kDefaultVerifyFlags, kWithoutReadBarrier>(offset);
     AssertToSpaceInvariantRefsVisitor visitor(collector_);
     visitor(ref);
   }
-  void operator()(mirror::Class* klass, mirror::Reference* ref ATTRIBUTE_UNUSED) const
+  void operator()(ObjPtr<mirror::Class> klass, ObjPtr<mirror::Reference> ref ATTRIBUTE_UNUSED) const
       REQUIRES_SHARED(Locks::mutator_lock_) ALWAYS_INLINE {
     CHECK(klass->IsTypeOfReferenceClass());
   }
@@ -1241,6 +1233,10 @@
     }
     gc_mark_stack_->Reset();
   } else if (mark_stack_mode == kMarkStackModeShared) {
+    // Do an empty checkpoint to avoid a race with a mutator preempted in the middle of a read
+    // barrier but before pushing onto the mark stack. b/32508093. Note the weak ref access is
+    // disabled at this point.
+    IssueEmptyCheckpoint();
     // Process the shared GC mark stack with a lock.
     {
       MutexLock mu(self, mark_stack_lock_);
@@ -1321,8 +1317,8 @@
 inline void ConcurrentCopying::ProcessMarkStackRef(mirror::Object* to_ref) {
   DCHECK(!region_space_->IsInFromSpace(to_ref));
   if (kUseBakerReadBarrier) {
-    DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr())
-        << " " << to_ref << " " << to_ref->GetReadBarrierPointer()
+    DCHECK(to_ref->GetReadBarrierState() == ReadBarrier::GrayState())
+        << " " << to_ref << " " << to_ref->GetReadBarrierState()
         << " is_marked=" << IsMarked(to_ref);
   }
   bool add_to_live_bytes = false;
@@ -1339,8 +1335,8 @@
     Scan(to_ref);
   }
   if (kUseBakerReadBarrier) {
-    DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr())
-        << " " << to_ref << " " << to_ref->GetReadBarrierPointer()
+    DCHECK(to_ref->GetReadBarrierState() == ReadBarrier::GrayState())
+        << " " << to_ref << " " << to_ref->GetReadBarrierState()
         << " is_marked=" << IsMarked(to_ref);
   }
 #ifdef USE_BAKER_OR_BROOKS_READ_BARRIER
@@ -1356,9 +1352,9 @@
     // above IsInToSpace() evaluates to true and we change the color from gray to white here in this
     // else block.
     if (kUseBakerReadBarrier) {
-      bool success = to_ref->AtomicSetReadBarrierPointer</*kCasRelease*/true>(
-          ReadBarrier::GrayPtr(),
-          ReadBarrier::WhitePtr());
+      bool success = to_ref->AtomicSetReadBarrierState</*kCasRelease*/true>(
+          ReadBarrier::GrayState(),
+          ReadBarrier::WhiteState());
       DCHECK(success) << "Must succeed as we won the race.";
     }
   }
@@ -1446,11 +1442,11 @@
         while (!mark_stack->IsEmpty()) {
           mirror::Object* obj = mark_stack->PopBack();
           if (kUseBakerReadBarrier) {
-            mirror::Object* rb_ptr = obj->GetReadBarrierPointer();
-            LOG(INFO) << "On mark queue : " << obj << " " << PrettyTypeOf(obj) << " rb_ptr=" << rb_ptr
-                      << " is_marked=" << IsMarked(obj);
+            uint32_t rb_state = obj->GetReadBarrierState();
+            LOG(INFO) << "On mark queue : " << obj << " " << obj->PrettyTypeOf() << " rb_state="
+                      << rb_state << " is_marked=" << IsMarked(obj);
           } else {
-            LOG(INFO) << "On mark queue : " << obj << " " << PrettyTypeOf(obj)
+            LOG(INFO) << "On mark queue : " << obj << " " << obj->PrettyTypeOf()
                       << " is_marked=" << IsMarked(obj);
           }
         }
@@ -1506,8 +1502,9 @@
   accounting::LargeObjectBitmap* const live_bitmap = los->GetLiveBitmap();
   accounting::LargeObjectBitmap* const mark_bitmap = los->GetMarkBitmap();
   // Walk through all of the objects and explicitly mark the zygote ones so they don't get swept.
-  live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(los->Begin()),
-                                reinterpret_cast<uintptr_t>(los->End()),
+  std::pair<uint8_t*, uint8_t*> range = los->GetBeginEndAtomic();
+  live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(range.first),
+                                reinterpret_cast<uintptr_t>(range.second),
                                 [mark_bitmap, los, self](mirror::Object* obj)
       REQUIRES(Locks::heap_bitmap_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -1594,9 +1591,8 @@
     SwapBitmaps();
     heap_->UnBindBitmaps();
 
-    // Delete the region bitmap.
+    // The bitmap was cleared at the start of the GC, there is nothing we need to do here.
     DCHECK(region_space_bitmap_ != nullptr);
-    delete region_space_bitmap_;
     region_space_bitmap_ = nullptr;
   }
 
@@ -1623,7 +1619,7 @@
         LogFromSpaceRefHolder(obj, offset);
       }
       ref->GetLockWord(false).Dump(LOG_STREAM(FATAL_WITHOUT_ABORT));
-      CHECK(false) << "Found from-space ref " << ref << " " << PrettyTypeOf(ref);
+      CHECK(false) << "Found from-space ref " << ref << " " << ref->PrettyTypeOf();
     } else {
       AssertToSpaceInvariantInNonMovingSpace(obj, ref);
     }
@@ -1670,12 +1666,14 @@
         // No info.
       } else if (gc_root_source->HasArtField()) {
         ArtField* field = gc_root_source->GetArtField();
-        LOG(FATAL_WITHOUT_ABORT) << "gc root in field " << field << " " << PrettyField(field);
+        LOG(FATAL_WITHOUT_ABORT) << "gc root in field " << field << " "
+                                 << ArtField::PrettyField(field);
         RootPrinter root_printer;
         field->VisitRoots(root_printer);
       } else if (gc_root_source->HasArtMethod()) {
         ArtMethod* method = gc_root_source->GetArtMethod();
-        LOG(FATAL_WITHOUT_ABORT) << "gc root in method " << method << " " << PrettyMethod(method);
+        LOG(FATAL_WITHOUT_ABORT) << "gc root in method " << method << " "
+                                 << ArtMethod::PrettyMethod(method);
         RootPrinter root_printer;
         method->VisitRoots(root_printer, kRuntimePointerSize);
       }
@@ -1683,7 +1681,7 @@
       region_space_->DumpNonFreeRegions(LOG_STREAM(FATAL_WITHOUT_ABORT));
       PrintFileToLog("/proc/self/maps", LogSeverity::FATAL_WITHOUT_ABORT);
       MemMap::DumpMaps(LOG_STREAM(FATAL_WITHOUT_ABORT), true);
-      CHECK(false) << "Found from-space ref " << ref << " " << PrettyTypeOf(ref);
+      CHECK(false) << "Found from-space ref " << ref << " " << ref->PrettyTypeOf();
     } else {
       AssertToSpaceInvariantInNonMovingSpace(nullptr, ref);
     }
@@ -1692,10 +1690,10 @@
 
 void ConcurrentCopying::LogFromSpaceRefHolder(mirror::Object* obj, MemberOffset offset) {
   if (kUseBakerReadBarrier) {
-    LOG(INFO) << "holder=" << obj << " " << PrettyTypeOf(obj)
-              << " holder rb_ptr=" << obj->GetReadBarrierPointer();
+    LOG(INFO) << "holder=" << obj << " " << obj->PrettyTypeOf()
+              << " holder rb_state=" << obj->GetReadBarrierState();
   } else {
-    LOG(INFO) << "holder=" << obj << " " << PrettyTypeOf(obj);
+    LOG(INFO) << "holder=" << obj << " " << obj->PrettyTypeOf();
   }
   if (region_space_->IsInFromSpace(obj)) {
     LOG(INFO) << "holder is in the from-space.";
@@ -1748,10 +1746,10 @@
         return;
       }
       bool updated_all_immune_objects = updated_all_immune_objects_.LoadSequentiallyConsistent();
-      CHECK(updated_all_immune_objects || ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr())
-          << "Unmarked immune space ref. obj=" << obj << " rb_ptr="
-          << (obj != nullptr ? obj->GetReadBarrierPointer() : nullptr)
-          << " ref=" << ref << " ref rb_ptr=" << ref->GetReadBarrierPointer()
+      CHECK(updated_all_immune_objects || ref->GetReadBarrierState() == ReadBarrier::GrayState())
+          << "Unmarked immune space ref. obj=" << obj << " rb_state="
+          << (obj != nullptr ? obj->GetReadBarrierState() : 0U)
+          << " ref=" << ref << " ref rb_state=" << ref->GetReadBarrierState()
           << " updated_all_immune_objects=" << updated_all_immune_objects;
     }
   } else {
@@ -1780,13 +1778,13 @@
   explicit RefFieldsVisitor(ConcurrentCopying* collector)
       : collector_(collector) {}
 
-  void operator()(mirror::Object* obj, MemberOffset offset, bool /* is_static */)
+  void operator()(ObjPtr<mirror::Object> obj, MemberOffset offset, bool /* is_static */)
       const ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES_SHARED(Locks::heap_bitmap_lock_) {
-    collector_->Process(obj, offset);
+    collector_->Process(obj.Ptr(), offset);
   }
 
-  void operator()(mirror::Class* klass, mirror::Reference* ref) const
+  void operator()(ObjPtr<mirror::Class> klass, ObjPtr<mirror::Reference> ref) const
       REQUIRES_SHARED(Locks::mutator_lock_) ALWAYS_INLINE {
     CHECK(klass->IsTypeOfReferenceClass());
     collector_->DelayReferenceReferent(klass, ref);
@@ -1949,10 +1947,11 @@
   size_t data_offset = mirror::Array::DataOffset(component_size).SizeValue();
   if (data_offset > byte_size) {
     // An int array is too big. Use java.lang.Object.
-    mirror::Class* java_lang_Object = WellKnownClasses::ToClass(WellKnownClasses::java_lang_Object);
-    AssertToSpaceInvariant(nullptr, MemberOffset(0), java_lang_Object);
+    ObjPtr<mirror::Class> java_lang_Object =
+        WellKnownClasses::ToClass(WellKnownClasses::java_lang_Object);
+    AssertToSpaceInvariant(nullptr, MemberOffset(0), java_lang_Object.Ptr());
     CHECK_EQ(byte_size, (java_lang_Object->GetObjectSize<kVerifyNone, kWithoutReadBarrier>()));
-    dummy_obj->SetClass(java_lang_Object);
+    dummy_obj->SetClass(java_lang_Object.Ptr());
     CHECK_EQ(byte_size, (dummy_obj->SizeOf<kVerifyNone, kWithoutReadBarrier>()));
   } else {
     // Use an int array.
@@ -2143,7 +2142,7 @@
     to_ref->SetLockWord(old_lock_word, false);
     // Set the gray ptr.
     if (kUseBakerReadBarrier) {
-      to_ref->SetReadBarrierPointer(ReadBarrier::GrayPtr());
+      to_ref->SetReadBarrierState(ReadBarrier::GrayState());
     }
 
     LockWord new_lock_word = LockWord::FromForwardingAddress(reinterpret_cast<size_t>(to_ref));
@@ -2161,7 +2160,7 @@
         DCHECK_EQ(bytes_allocated, non_moving_space_bytes_allocated);
       }
       if (kUseBakerReadBarrier) {
-        DCHECK(to_ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr());
+        DCHECK(to_ref->GetReadBarrierState() == ReadBarrier::GrayState());
       }
       DCHECK(GetFwdPtr(from_ref) == to_ref);
       CHECK_NE(to_ref->GetLockWord(false).GetState(), LockWord::kForwardingAddress);
@@ -2247,14 +2246,14 @@
   if (!is_los && mark_bitmap->Test(ref)) {
     // Already marked.
     if (kUseBakerReadBarrier) {
-      DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
-             ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
+      DCHECK(ref->GetReadBarrierState() == ReadBarrier::GrayState() ||
+             ref->GetReadBarrierState() == ReadBarrier::WhiteState());
     }
   } else if (is_los && los_bitmap->Test(ref)) {
     // Already marked in LOS.
     if (kUseBakerReadBarrier) {
-      DCHECK(ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr() ||
-             ref->GetReadBarrierPointer() == ReadBarrier::WhitePtr());
+      DCHECK(ref->GetReadBarrierState() == ReadBarrier::GrayState() ||
+             ref->GetReadBarrierState() == ReadBarrier::WhiteState());
     }
   } else {
     // Not marked.
@@ -2267,7 +2266,7 @@
         DCHECK(!los_bitmap->Test(ref));
       }
       if (kUseBakerReadBarrier) {
-        DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
+        DCHECK_EQ(ref->GetReadBarrierState(), ReadBarrier::WhiteState());
       }
     } else {
       // For the baker-style RB, we need to handle 'false-gray' cases. See the
@@ -2283,25 +2282,25 @@
       // This may or may not succeed, which is ok.
       bool cas_success = false;
       if (kUseBakerReadBarrier) {
-        cas_success = ref->AtomicSetReadBarrierPointer(ReadBarrier::WhitePtr(),
-                                                       ReadBarrier::GrayPtr());
+        cas_success = ref->AtomicSetReadBarrierState(ReadBarrier::WhiteState(),
+                                                     ReadBarrier::GrayState());
       }
       if (!is_los && mark_bitmap->AtomicTestAndSet(ref)) {
         // Already marked.
         if (kUseBakerReadBarrier && cas_success &&
-            ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+            ref->GetReadBarrierState() == ReadBarrier::GrayState()) {
           PushOntoFalseGrayStack(ref);
         }
       } else if (is_los && los_bitmap->AtomicTestAndSet(ref)) {
         // Already marked in LOS.
         if (kUseBakerReadBarrier && cas_success &&
-            ref->GetReadBarrierPointer() == ReadBarrier::GrayPtr()) {
+            ref->GetReadBarrierState() == ReadBarrier::GrayState()) {
           PushOntoFalseGrayStack(ref);
         }
       } else {
         // Newly marked.
         if (kUseBakerReadBarrier) {
-          DCHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::GrayPtr());
+          DCHECK_EQ(ref->GetReadBarrierState(), ReadBarrier::GrayState());
         }
         PushOntoMarkStack(ref);
       }
@@ -2316,9 +2315,13 @@
     MutexLock mu(self, mark_stack_lock_);
     CHECK_EQ(pooled_mark_stacks_.size(), kMarkStackPoolSize);
   }
-  region_space_ = nullptr;
   {
-    MutexLock mu(Thread::Current(), skipped_blocks_lock_);
+    TimingLogger::ScopedTiming split("ClearRegionSpaceCards", GetTimings());
+    // We do not currently use the region space cards at all, madvise them away to save ram.
+    heap_->GetCardTable()->ClearCardRange(region_space_->Begin(), region_space_->Limit());
+  }
+  {
+    MutexLock mu(self, skipped_blocks_lock_);
     skipped_blocks_map_.clear();
   }
   {
@@ -2330,10 +2333,9 @@
     if (kUseBakerReadBarrier && kFilterModUnionCards) {
       TimingLogger::ScopedTiming split("FilterModUnionCards", GetTimings());
       ReaderMutexLock mu2(self, *Locks::heap_bitmap_lock_);
-      gc::Heap* const heap = Runtime::Current()->GetHeap();
       for (space::ContinuousSpace* space : immune_spaces_.GetSpaces()) {
         DCHECK(space->IsImageSpace() || space->IsZygoteSpace());
-        accounting::ModUnionTable* table = heap->FindModUnionTableFromSpace(space);
+        accounting::ModUnionTable* table = heap_->FindModUnionTableFromSpace(space);
         // Filter out cards that don't need to be set.
         if (table != nullptr) {
           table->FilterCards();
@@ -2342,7 +2344,7 @@
     }
     if (kUseBakerReadBarrier) {
       TimingLogger::ScopedTiming split("EmptyRBMarkBitStack", GetTimings());
-      DCHECK(rb_mark_bit_stack_.get() != nullptr);
+      DCHECK(rb_mark_bit_stack_ != nullptr);
       const auto* limit = rb_mark_bit_stack_->End();
       for (StackReference<mirror::Object>* it = rb_mark_bit_stack_->Begin(); it != limit; ++it) {
         CHECK(it->AsMirrorPtr()->AtomicSetMarkBit(1, 0));
@@ -2376,7 +2378,8 @@
   return Mark(from_ref);
 }
 
-void ConcurrentCopying::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference) {
+void ConcurrentCopying::DelayReferenceReferent(ObjPtr<mirror::Class> klass,
+                                               ObjPtr<mirror::Reference> reference) {
   heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, reference, this);
 }
 
diff --git a/runtime/gc/collector/concurrent_copying.h b/runtime/gc/collector/concurrent_copying.h
index 81ffbc5..5b8a557 100644
--- a/runtime/gc/collector/concurrent_copying.h
+++ b/runtime/gc/collector/concurrent_copying.h
@@ -169,7 +169,8 @@
   void SwitchToSharedMarkStackMode() REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!mark_stack_lock_);
   void SwitchToGcExclusiveMarkStackMode() REQUIRES_SHARED(Locks::mutator_lock_);
-  virtual void DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference) OVERRIDE
+  virtual void DelayReferenceReferent(ObjPtr<mirror::Class> klass,
+                                      ObjPtr<mirror::Reference> reference) OVERRIDE
       REQUIRES_SHARED(Locks::mutator_lock_);
   void ProcessReferences(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_);
   virtual mirror::Object* MarkObject(mirror::Object* from_ref) OVERRIDE
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 18c4adf..ed16854 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -25,6 +25,8 @@
 #include "base/systrace.h"
 #include "base/time_utils.h"
 #include "gc/accounting/heap_bitmap.h"
+#include "gc/gc_pause_listener.h"
+#include "gc/heap.h"
 #include "gc/space/large_object_space.h"
 #include "gc/space/space-inl.h"
 #include "thread-inl.h"
@@ -156,12 +158,22 @@
 
 GarbageCollector::ScopedPause::ScopedPause(GarbageCollector* collector)
     : start_time_(NanoTime()), collector_(collector) {
-  Runtime::Current()->GetThreadList()->SuspendAll(__FUNCTION__);
+  Runtime* runtime = Runtime::Current();
+  runtime->GetThreadList()->SuspendAll(__FUNCTION__);
+  GcPauseListener* pause_listener = runtime->GetHeap()->GetGcPauseListener();
+  if (pause_listener != nullptr) {
+    pause_listener->StartPause();
+  }
 }
 
 GarbageCollector::ScopedPause::~ScopedPause() {
   collector_->RegisterPause(NanoTime() - start_time_);
-  Runtime::Current()->GetThreadList()->ResumeAll();
+  Runtime* runtime = Runtime::Current();
+  GcPauseListener* pause_listener = runtime->GetHeap()->GetGcPauseListener();
+  if (pause_listener != nullptr) {
+    pause_listener->EndPause();
+  }
+  runtime->GetThreadList()->ResumeAll();
 }
 
 // Returns the current GC iteration and assocated info.
diff --git a/runtime/gc/collector/garbage_collector.h b/runtime/gc/collector/garbage_collector.h
index 4ffa254..5b51399 100644
--- a/runtime/gc/collector/garbage_collector.h
+++ b/runtime/gc/collector/garbage_collector.h
@@ -196,7 +196,8 @@
       REQUIRES_SHARED(Locks::mutator_lock_) = 0;
   virtual void MarkHeapReference(mirror::HeapReference<mirror::Object>* obj)
       REQUIRES_SHARED(Locks::mutator_lock_) = 0;
-  virtual void DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference)
+  virtual void DelayReferenceReferent(ObjPtr<mirror::Class> klass,
+                                      ObjPtr<mirror::Reference> reference)
       REQUIRES_SHARED(Locks::mutator_lock_) = 0;
 
  protected:
diff --git a/runtime/gc/collector/mark_compact.cc b/runtime/gc/collector/mark_compact.cc
index 6d2f009..ddcb6c0 100644
--- a/runtime/gc/collector/mark_compact.cc
+++ b/runtime/gc/collector/mark_compact.cc
@@ -124,9 +124,9 @@
   if (obj == nullptr) {
     return nullptr;
   }
-  if (kUseBakerOrBrooksReadBarrier) {
-    // Verify all the objects have the correct forward pointer installed.
-    obj->AssertReadBarrierPointer();
+  if (kUseBakerReadBarrier) {
+    // Verify all the objects have the correct forward state installed.
+    obj->AssertReadBarrierState();
   }
   if (!immune_spaces_.IsInImmuneRegion(obj)) {
     if (objects_before_forwarding_->HasAddress(obj)) {
@@ -418,7 +418,7 @@
     collector_->UpdateHeapReference(obj->GetFieldObjectReferenceAddr<kVerifyNone>(offset));
   }
 
-  void operator()(mirror::Class* /*klass*/, mirror::Reference* ref) const
+  void operator()(ObjPtr<mirror::Class> /*klass*/, mirror::Reference* ref) const
       REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
     collector_->UpdateHeapReference(
         ref->GetFieldObjectReferenceAddr<kVerifyNone>(mirror::Reference::ReferentOffset()));
@@ -543,7 +543,8 @@
 
 // Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
 // marked, put it on the appropriate list in the heap for later processing.
-void MarkCompact::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference) {
+void MarkCompact::DelayReferenceReferent(ObjPtr<mirror::Class> klass,
+                                         ObjPtr<mirror::Reference> reference) {
   heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, reference, this);
 }
 
@@ -551,13 +552,16 @@
  public:
   explicit MarkObjectVisitor(MarkCompact* collector) : collector_(collector) {}
 
-  void operator()(mirror::Object* obj, MemberOffset offset, bool /*is_static*/) const ALWAYS_INLINE
+  void operator()(ObjPtr<mirror::Object> obj,
+                  MemberOffset offset,
+                  bool /*is_static*/) const ALWAYS_INLINE
       REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
     // Object was already verified when we scanned it.
     collector_->MarkObject(obj->GetFieldObject<mirror::Object, kVerifyNone>(offset));
   }
 
-  void operator()(mirror::Class* klass, mirror::Reference* ref) const
+  void operator()(ObjPtr<mirror::Class> klass,
+                  ObjPtr<mirror::Reference> ref) const
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(Locks::heap_bitmap_lock_) {
     collector_->DelayReferenceReferent(klass, ref);
diff --git a/runtime/gc/collector/mark_compact.h b/runtime/gc/collector/mark_compact.h
index a61646c..564f85b 100644
--- a/runtime/gc/collector/mark_compact.h
+++ b/runtime/gc/collector/mark_compact.h
@@ -122,7 +122,7 @@
       OVERRIDE REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
 
   // Schedules an unmarked object for reference processing.
-  void DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference)
+  void DelayReferenceReferent(ObjPtr<mirror::Class> klass, ObjPtr<mirror::Reference> reference)
       REQUIRES_SHARED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
  protected:
diff --git a/runtime/gc/collector/mark_sweep.cc b/runtime/gc/collector/mark_sweep.cc
index b89d99c..673a97e 100644
--- a/runtime/gc/collector/mark_sweep.cc
+++ b/runtime/gc/collector/mark_sweep.cc
@@ -272,7 +272,7 @@
   explicit ScanObjectVisitor(MarkSweep* const mark_sweep) ALWAYS_INLINE
       : mark_sweep_(mark_sweep) {}
 
-  void operator()(mirror::Object* obj) const
+  void operator()(ObjPtr<mirror::Object> obj) const
       ALWAYS_INLINE
       REQUIRES(Locks::heap_bitmap_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -280,7 +280,7 @@
       Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
       Locks::heap_bitmap_lock_->AssertExclusiveHeld(Thread::Current());
     }
-    mark_sweep_->ScanObject(obj);
+    mark_sweep_->ScanObject(obj.Ptr());
   }
 
  private:
@@ -423,7 +423,7 @@
                             << (mark_sweep_->GetHeap()->IsLiveObjectLocked(holder_)
                                 ? "alive" : "dead")
                             << " holder_size=" << holder_size
-                            << " holder_type=" << PrettyTypeOf(holder_)
+                            << " holder_type=" << holder_->PrettyTypeOf()
                             << " offset=" << offset_.Uint32Value()
                             << " field=" << (field != nullptr ? field->GetName() : "nullptr")
                             << " field_type="
@@ -462,9 +462,9 @@
                                          mirror::Object* holder,
                                          MemberOffset offset) {
   DCHECK(obj != nullptr);
-  if (kUseBakerOrBrooksReadBarrier) {
-    // Verify all the objects have the correct pointer installed.
-    obj->AssertReadBarrierPointer();
+  if (kUseBakerReadBarrier) {
+    // Verify all the objects have the correct state installed.
+    obj->AssertReadBarrierState();
   }
   if (immune_spaces_.IsInImmuneRegion(obj)) {
     if (kCountMarkedObjects) {
@@ -503,9 +503,9 @@
 
 inline bool MarkSweep::MarkObjectParallel(mirror::Object* obj) {
   DCHECK(obj != nullptr);
-  if (kUseBakerOrBrooksReadBarrier) {
-    // Verify all the objects have the correct pointer installed.
-    obj->AssertReadBarrierPointer();
+  if (kUseBakerReadBarrier) {
+    // Verify all the objects have the correct state installed.
+    obj->AssertReadBarrierState();
   }
   if (immune_spaces_.IsInImmuneRegion(obj)) {
     DCHECK(IsMarked(obj) != nullptr);
@@ -608,15 +608,14 @@
 void MarkSweep::MarkConcurrentRoots(VisitRootFlags flags) {
   TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
   // Visit all runtime roots and clear dirty flags.
-  Runtime::Current()->VisitConcurrentRoots(
-      this, static_cast<VisitRootFlags>(flags | kVisitRootFlagNonMoving));
+  Runtime::Current()->VisitConcurrentRoots(this, flags);
 }
 
 class MarkSweep::DelayReferenceReferentVisitor {
  public:
   explicit DelayReferenceReferentVisitor(MarkSweep* collector) : collector_(collector) {}
 
-  void operator()(mirror::Class* klass, mirror::Reference* ref) const
+  void operator()(ObjPtr<mirror::Class> klass, ObjPtr<mirror::Reference> ref) const
       REQUIRES(Locks::heap_bitmap_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     collector_->DelayReferenceReferent(klass, ref);
@@ -1297,9 +1296,9 @@
   }
 }
 
-// Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
+// Process the "referent" field lin a java.lang.ref.Reference.  If the referent has not yet been
 // marked, put it on the appropriate list in the heap for later processing.
-void MarkSweep::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* ref) {
+void MarkSweep::DelayReferenceReferent(ObjPtr<mirror::Class> klass, ObjPtr<mirror::Reference> ref) {
   heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, ref, this);
 }
 
diff --git a/runtime/gc/collector/mark_sweep.h b/runtime/gc/collector/mark_sweep.h
index bbac9da..a94cb27 100644
--- a/runtime/gc/collector/mark_sweep.h
+++ b/runtime/gc/collector/mark_sweep.h
@@ -98,7 +98,7 @@
       REQUIRES(!mark_stack_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void MarkConcurrentRoots(VisitRootFlags flags)
+  virtual void MarkConcurrentRoots(VisitRootFlags flags)
       REQUIRES(Locks::heap_bitmap_lock_)
       REQUIRES(!mark_stack_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
@@ -225,7 +225,7 @@
   }
 
   // Schedules an unmarked object for reference processing.
-  void DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference)
+  void DelayReferenceReferent(ObjPtr<mirror::Class> klass, ObjPtr<mirror::Reference> reference)
       REQUIRES_SHARED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
  protected:
diff --git a/runtime/gc/collector/semi_space.cc b/runtime/gc/collector/semi_space.cc
index 2e97172..a815b83 100644
--- a/runtime/gc/collector/semi_space.cc
+++ b/runtime/gc/collector/semi_space.cc
@@ -296,7 +296,6 @@
       REQUIRES_SHARED(Locks::mutator_lock_) ALWAYS_INLINE {
     mirror::Object* ref = obj->GetFieldObject<mirror::Object>(offset);
     if (from_space_->HasAddress(ref)) {
-      Runtime::Current()->GetHeap()->DumpObject(LOG_STREAM(INFO), obj);
       LOG(FATAL) << ref << " found in from space";
     }
   }
@@ -409,8 +408,9 @@
     // classes (primitive array classes) that could move though they
     // don't contain any other references.
     accounting::LargeObjectBitmap* large_live_bitmap = los->GetLiveBitmap();
-    large_live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(los->Begin()),
-                                        reinterpret_cast<uintptr_t>(los->End()),
+    std::pair<uint8_t*, uint8_t*> range = los->GetBeginEndAtomic();
+    large_live_bitmap->VisitMarkedRange(reinterpret_cast<uintptr_t>(range.first),
+                                        reinterpret_cast<uintptr_t>(range.second),
                                         [this](mirror::Object* obj)
         REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
       ScanObject(obj);
@@ -589,13 +589,9 @@
   // references.
   saved_bytes_ +=
       CopyAvoidingDirtyingPages(reinterpret_cast<void*>(forward_address), obj, object_size);
-  if (kUseBakerOrBrooksReadBarrier) {
-    obj->AssertReadBarrierPointer();
-    if (kUseBrooksReadBarrier) {
-      DCHECK_EQ(forward_address->GetReadBarrierPointer(), obj);
-      forward_address->SetReadBarrierPointer(forward_address);
-    }
-    forward_address->AssertReadBarrierPointer();
+  if (kUseBakerReadBarrier) {
+    obj->AssertReadBarrierState();
+    forward_address->AssertReadBarrierState();
   }
   DCHECK(to_space_->HasAddress(forward_address) ||
          fallback_space_->HasAddress(forward_address) ||
@@ -680,7 +676,8 @@
 
 // Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
 // marked, put it on the appropriate list in the heap for later processing.
-void SemiSpace::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference) {
+void SemiSpace::DelayReferenceReferent(ObjPtr<mirror::Class> klass,
+                                       ObjPtr<mirror::Reference> reference) {
   heap_->GetReferenceProcessor()->DelayReferenceReferent(klass, reference, this);
 }
 
@@ -688,13 +685,13 @@
  public:
   explicit MarkObjectVisitor(SemiSpace* collector) : collector_(collector) {}
 
-  void operator()(Object* obj, MemberOffset offset, bool /* is_static */) const ALWAYS_INLINE
+  void operator()(ObjPtr<Object> obj, MemberOffset offset, bool /* is_static */) const ALWAYS_INLINE
       REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
     // Object was already verified when we scanned it.
     collector_->MarkObject(obj->GetFieldObjectReferenceAddr<kVerifyNone>(offset));
   }
 
-  void operator()(mirror::Class* klass, mirror::Reference* ref) const
+  void operator()(ObjPtr<mirror::Class> klass, ObjPtr<mirror::Reference> ref) const
       REQUIRES(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
     collector_->DelayReferenceReferent(klass, ref);
   }
diff --git a/runtime/gc/collector/semi_space.h b/runtime/gc/collector/semi_space.h
index 4b63d9b..4cebcc3 100644
--- a/runtime/gc/collector/semi_space.h
+++ b/runtime/gc/collector/semi_space.h
@@ -156,7 +156,7 @@
       REQUIRES(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
   // Schedules an unmarked object for reference processing.
-  void DelayReferenceReferent(mirror::Class* klass, mirror::Reference* reference)
+  void DelayReferenceReferent(ObjPtr<mirror::Class> klass, ObjPtr<mirror::Reference> reference)
       REQUIRES_SHARED(Locks::heap_bitmap_lock_, Locks::mutator_lock_);
 
  protected:
diff --git a/runtime/gc/collector/sticky_mark_sweep.cc b/runtime/gc/collector/sticky_mark_sweep.cc
index bb7e854..a2dbe3f 100644
--- a/runtime/gc/collector/sticky_mark_sweep.cc
+++ b/runtime/gc/collector/sticky_mark_sweep.cc
@@ -56,6 +56,19 @@
   RecursiveMarkDirtyObjects(false, accounting::CardTable::kCardDirty - 1);
 }
 
+void StickyMarkSweep::MarkConcurrentRoots(VisitRootFlags flags) {
+  TimingLogger::ScopedTiming t(__FUNCTION__, GetTimings());
+  // Visit all runtime roots and clear dirty flags including class loader. This is done to prevent
+  // incorrect class unloading since the GC does not card mark when storing store the class during
+  // object allocation. Doing this for each allocation would be slow.
+  // Since the card is not dirty, it means the object may not get scanned. This can cause class
+  // unloading to occur even though the class and class loader are reachable through the object's
+  // class.
+  Runtime::Current()->VisitConcurrentRoots(
+      this,
+      static_cast<VisitRootFlags>(flags | kVisitRootFlagClassLoader));
+}
+
 void StickyMarkSweep::Sweep(bool swap_bitmaps ATTRIBUTE_UNUSED) {
   SweepArray(GetHeap()->GetLiveStack(), false);
 }
diff --git a/runtime/gc/collector/sticky_mark_sweep.h b/runtime/gc/collector/sticky_mark_sweep.h
index 100ca64..45f912f 100644
--- a/runtime/gc/collector/sticky_mark_sweep.h
+++ b/runtime/gc/collector/sticky_mark_sweep.h
@@ -33,6 +33,12 @@
   StickyMarkSweep(Heap* heap, bool is_concurrent, const std::string& name_prefix = "");
   ~StickyMarkSweep() {}
 
+  virtual void MarkConcurrentRoots(VisitRootFlags flags)
+      OVERRIDE
+      REQUIRES(Locks::heap_bitmap_lock_)
+      REQUIRES(!mark_stack_lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
  protected:
   // Bind the live bits to the mark bits of bitmaps for all spaces, all spaces other than the
   // alloc space will be marked as immune.
diff --git a/runtime/gc/gc_cause.cc b/runtime/gc/gc_cause.cc
index 1d377a4..7ff845d 100644
--- a/runtime/gc/gc_cause.cc
+++ b/runtime/gc/gc_cause.cc
@@ -38,10 +38,10 @@
     case kGcCauseDebugger: return "Debugger";
     case kGcCauseClassLinker: return "ClassLinker";
     case kGcCauseJitCodeCache: return "JitCodeCache";
-    default:
-      LOG(FATAL) << "Unreachable";
-      UNREACHABLE();
+    case kGcCauseAddRemoveSystemWeakHolder: return "SystemWeakHolder";
   }
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
 }
 
 std::ostream& operator<<(std::ostream& os, const GcCause& gc_cause) {
diff --git a/test/562-no-intermediate/src/Main.java b/runtime/gc/gc_pause_listener.h
similarity index 64%
copy from test/562-no-intermediate/src/Main.java
copy to runtime/gc/gc_pause_listener.h
index 3b74d6f..da35d2a 100644
--- a/test/562-no-intermediate/src/Main.java
+++ b/runtime/gc/gc_pause_listener.h
@@ -14,14 +14,21 @@
  * limitations under the License.
  */
 
-public class Main {
+#ifndef ART_RUNTIME_GC_GC_PAUSE_LISTENER_H_
+#define ART_RUNTIME_GC_GC_PAUSE_LISTENER_H_
 
-  /// CHECK-START-ARM64: int Main.main(String[]) register_allocator (after)
-  /// CHECK-NOT: IntermediateAddress
-  public static void main(String[] args) {
-    array[index] += Math.cos(42);
-  }
+namespace art {
+namespace gc {
 
-  static int index = 0;
-  static double[] array = new double[2];
-}
+class GcPauseListener {
+ public:
+  virtual ~GcPauseListener() {}
+
+  virtual void StartPause() = 0;
+  virtual void EndPause() = 0;
+};
+
+}  // namespace gc
+}  // namespace art
+
+#endif  // ART_RUNTIME_GC_GC_PAUSE_LISTENER_H_
diff --git a/runtime/gc/heap-inl.h b/runtime/gc/heap-inl.h
index 6d61c64..97129e8 100644
--- a/runtime/gc/heap-inl.h
+++ b/runtime/gc/heap-inl.h
@@ -19,6 +19,7 @@
 
 #include "heap.h"
 
+#include "allocation_listener.h"
 #include "base/time_utils.h"
 #include "gc/accounting/card_table-inl.h"
 #include "gc/allocation_record.h"
@@ -28,6 +29,7 @@
 #include "gc/space/large_object_space.h"
 #include "gc/space/region_space-inl.h"
 #include "gc/space/rosalloc_space-inl.h"
+#include "obj_ptr-inl.h"
 #include "runtime.h"
 #include "handle_scope-inl.h"
 #include "thread-inl.h"
@@ -39,7 +41,7 @@
 
 template <bool kInstrumented, bool kCheckLargeObject, typename PreFenceVisitor>
 inline mirror::Object* Heap::AllocObjectWithAllocator(Thread* self,
-                                                      mirror::Class* klass,
+                                                      ObjPtr<mirror::Class> klass,
                                                       size_t byte_count,
                                                       AllocatorType allocator,
                                                       const PreFenceVisitor& pre_fence_visitor) {
@@ -50,16 +52,19 @@
     CHECK_EQ(self->GetState(), kRunnable);
     self->AssertThreadSuspensionIsAllowable();
     self->AssertNoPendingException();
+    // Make sure to preserve klass.
+    StackHandleScope<1> hs(self);
+    HandleWrapperObjPtr<mirror::Class> h = hs.NewHandleWrapper(&klass);
     self->PoisonObjectPointers();
   }
   // Need to check that we arent the large object allocator since the large object allocation code
   // path this function. If we didn't check we would have an infinite loop.
-  mirror::Object* obj;
+  ObjPtr<mirror::Object> obj;
   if (kCheckLargeObject && UNLIKELY(ShouldAllocLargeObject(klass, byte_count))) {
     obj = AllocLargeObject<kInstrumented, PreFenceVisitor>(self, &klass, byte_count,
                                                            pre_fence_visitor);
     if (obj != nullptr) {
-      return obj;
+      return obj.Ptr();
     } else {
       // There should be an OOM exception, since we are retrying, clear it.
       self->ClearException();
@@ -81,26 +86,21 @@
     obj = self->AllocTlab(byte_count);
     DCHECK(obj != nullptr) << "AllocTlab can't fail";
     obj->SetClass(klass);
-    if (kUseBakerOrBrooksReadBarrier) {
-      if (kUseBrooksReadBarrier) {
-        obj->SetReadBarrierPointer(obj);
-      }
-      obj->AssertReadBarrierPointer();
+    if (kUseBakerReadBarrier) {
+      obj->AssertReadBarrierState();
     }
     bytes_allocated = byte_count;
     usable_size = bytes_allocated;
     pre_fence_visitor(obj, usable_size);
     QuasiAtomic::ThreadFenceForConstructor();
-  } else if (!kInstrumented && allocator == kAllocatorTypeRosAlloc &&
-             (obj = rosalloc_space_->AllocThreadLocal(self, byte_count, &bytes_allocated)) &&
-             LIKELY(obj != nullptr)) {
+  } else if (
+      !kInstrumented && allocator == kAllocatorTypeRosAlloc &&
+      (obj = rosalloc_space_->AllocThreadLocal(self, byte_count, &bytes_allocated)) != nullptr &&
+      LIKELY(obj != nullptr)) {
     DCHECK(!is_running_on_memory_tool_);
     obj->SetClass(klass);
-    if (kUseBakerOrBrooksReadBarrier) {
-      if (kUseBrooksReadBarrier) {
-        obj->SetReadBarrierPointer(obj);
-      }
-      obj->AssertReadBarrierPointer();
+    if (kUseBakerReadBarrier) {
+      obj->AssertReadBarrierState();
     }
     usable_size = bytes_allocated;
     pre_fence_visitor(obj, usable_size);
@@ -137,11 +137,8 @@
     DCHECK_GT(bytes_allocated, 0u);
     DCHECK_GT(usable_size, 0u);
     obj->SetClass(klass);
-    if (kUseBakerOrBrooksReadBarrier) {
-      if (kUseBrooksReadBarrier) {
-        obj->SetReadBarrierPointer(obj);
-      }
-      obj->AssertReadBarrierPointer();
+    if (kUseBakerReadBarrier) {
+      obj->AssertReadBarrierState();
     }
     if (collector::SemiSpace::kUseRememberedSet && UNLIKELY(allocator == kAllocatorTypeNonMoving)) {
       // (Note this if statement will be constant folded away for the
@@ -183,6 +180,12 @@
       DCHECK(allocation_records_ != nullptr);
       allocation_records_->RecordAllocation(self, &obj, bytes_allocated);
     }
+    AllocationListener* l = alloc_listener_.LoadSequentiallyConsistent();
+    if (l != nullptr) {
+      // Same as above. We assume that a listener that was once stored will never be deleted.
+      // Otherwise we'd have to perform this under a lock.
+      l->ObjectAllocated(self, &obj, bytes_allocated);
+    }
   } else {
     DCHECK(!IsAllocTrackingEnabled());
   }
@@ -205,25 +208,25 @@
   }
   VerifyObject(obj);
   self->VerifyStack();
-  return obj;
+  return obj.Ptr();
 }
 
 // The size of a thread-local allocation stack in the number of references.
 static constexpr size_t kThreadLocalAllocationStackSize = 128;
 
-inline void Heap::PushOnAllocationStack(Thread* self, mirror::Object** obj) {
+inline void Heap::PushOnAllocationStack(Thread* self, ObjPtr<mirror::Object>* obj) {
   if (kUseThreadLocalAllocationStack) {
-    if (UNLIKELY(!self->PushOnThreadLocalAllocationStack(*obj))) {
+    if (UNLIKELY(!self->PushOnThreadLocalAllocationStack(obj->Ptr()))) {
       PushOnThreadLocalAllocationStackWithInternalGC(self, obj);
     }
-  } else if (UNLIKELY(!allocation_stack_->AtomicPushBack(*obj))) {
+  } else if (UNLIKELY(!allocation_stack_->AtomicPushBack(obj->Ptr()))) {
     PushOnAllocationStackWithInternalGC(self, obj);
   }
 }
 
 template <bool kInstrumented, typename PreFenceVisitor>
 inline mirror::Object* Heap::AllocLargeObject(Thread* self,
-                                              mirror::Class** klass,
+                                              ObjPtr<mirror::Class>* klass,
                                               size_t byte_count,
                                               const PreFenceVisitor& pre_fence_visitor) {
   // Save and restore the class in case it moves.
@@ -397,7 +400,7 @@
   return ret;
 }
 
-inline bool Heap::ShouldAllocLargeObject(mirror::Class* c, size_t byte_count) const {
+inline bool Heap::ShouldAllocLargeObject(ObjPtr<mirror::Class> c, size_t byte_count) const {
   // We need to have a zygote space or else our newly allocated large object can end up in the
   // Zygote resulting in it being prematurely freed.
   // We can only do this for primitive objects since large objects will not be within the card table
@@ -427,12 +430,28 @@
 
 inline void Heap::CheckConcurrentGC(Thread* self,
                                     size_t new_num_bytes_allocated,
-                                    mirror::Object** obj) {
+                                    ObjPtr<mirror::Object>* obj) {
   if (UNLIKELY(new_num_bytes_allocated >= concurrent_start_bytes_)) {
     RequestConcurrentGCAndSaveObject(self, false, obj);
   }
 }
 
+inline void Heap::WriteBarrierField(ObjPtr<mirror::Object> dst,
+                                    MemberOffset offset ATTRIBUTE_UNUSED,
+                                    ObjPtr<mirror::Object> new_value ATTRIBUTE_UNUSED) {
+  card_table_->MarkCard(dst.Ptr());
+}
+
+inline void Heap::WriteBarrierArray(ObjPtr<mirror::Object> dst,
+                                    int start_offset ATTRIBUTE_UNUSED,
+                                    size_t length ATTRIBUTE_UNUSED) {
+  card_table_->MarkCard(dst.Ptr());
+}
+
+inline void Heap::WriteBarrierEveryFieldOf(ObjPtr<mirror::Object> obj) {
+  card_table_->MarkCard(obj.Ptr());
+}
+
 }  // namespace gc
 }  // namespace art
 
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index 01ad8d0..f0e619d 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -21,6 +21,7 @@
 #include <unwind.h>  // For GC verification.
 #include <vector>
 
+#include "allocation_listener.h"
 #include "art_field-inl.h"
 #include "base/allocator.h"
 #include "base/arena_allocator.h"
@@ -46,6 +47,7 @@
 #include "gc/collector/semi_space.h"
 #include "gc/collector/sticky_mark_sweep.h"
 #include "gc/reference_processor.h"
+#include "gc/scoped_gc_critical_section.h"
 #include "gc/space/bump_pointer_space.h"
 #include "gc/space/dlmalloc_space-inl.h"
 #include "gc/space/image_space.h"
@@ -56,6 +58,7 @@
 #include "gc/space/zygote_space.h"
 #include "gc/task_processor.h"
 #include "entrypoints/quick/quick_alloc_entrypoints.h"
+#include "gc_pause_listener.h"
 #include "heap-inl.h"
 #include "image.h"
 #include "intern_table.h"
@@ -261,6 +264,10 @@
   if (VLOG_IS_ON(heap) || VLOG_IS_ON(startup)) {
     LOG(INFO) << "Heap() entering";
   }
+  if (kUseReadBarrier) {
+    CHECK_EQ(foreground_collector_type_, kCollectorTypeCC);
+    CHECK_EQ(background_collector_type_, kCollectorTypeCCBackground);
+  }
   CHECK_GE(large_object_threshold, kMinLargeObjectThreshold);
   ScopedTrace trace(__FUNCTION__);
   Runtime* const runtime = Runtime::Current();
@@ -609,6 +616,8 @@
       concurrent_copying_collector_ = new collector::ConcurrentCopying(this,
                                                                        "",
                                                                        measure_gc_performance);
+      DCHECK(region_space_ != nullptr);
+      concurrent_copying_collector_->SetRegionSpace(region_space_);
       garbage_collectors_.push_back(concurrent_copying_collector_);
     }
     if (MayUseCollector(kCollectorTypeMC)) {
@@ -759,83 +768,6 @@
   }
 }
 
-std::string Heap::SafeGetClassDescriptor(mirror::Class* klass) {
-  if (!IsValidContinuousSpaceObjectAddress(klass)) {
-    return StringPrintf("<non heap address klass %p>", klass);
-  }
-  mirror::Class* component_type = klass->GetComponentType<kVerifyNone>();
-  if (IsValidContinuousSpaceObjectAddress(component_type) && klass->IsArrayClass<kVerifyNone>()) {
-    std::string result("[");
-    result += SafeGetClassDescriptor(component_type);
-    return result;
-  } else if (UNLIKELY(klass->IsPrimitive<kVerifyNone>())) {
-    return Primitive::Descriptor(klass->GetPrimitiveType<kVerifyNone>());
-  } else if (UNLIKELY(klass->IsProxyClass<kVerifyNone>())) {
-    return Runtime::Current()->GetClassLinker()->GetDescriptorForProxy(klass);
-  } else {
-    mirror::DexCache* dex_cache = klass->GetDexCache<kVerifyNone>();
-    if (!IsValidContinuousSpaceObjectAddress(dex_cache)) {
-      return StringPrintf("<non heap address dex_cache %p>", dex_cache);
-    }
-    const DexFile* dex_file = dex_cache->GetDexFile();
-    uint16_t class_def_idx = klass->GetDexClassDefIndex();
-    if (class_def_idx == DexFile::kDexNoIndex16) {
-      return "<class def not found>";
-    }
-    const DexFile::ClassDef& class_def = dex_file->GetClassDef(class_def_idx);
-    const DexFile::TypeId& type_id = dex_file->GetTypeId(class_def.class_idx_);
-    return dex_file->GetTypeDescriptor(type_id);
-  }
-}
-
-std::string Heap::SafePrettyTypeOf(mirror::Object* obj) {
-  if (obj == nullptr) {
-    return "null";
-  }
-  mirror::Class* klass = obj->GetClass<kVerifyNone>();
-  if (klass == nullptr) {
-    return "(class=null)";
-  }
-  std::string result(SafeGetClassDescriptor(klass));
-  if (obj->IsClass()) {
-    result += "<" + SafeGetClassDescriptor(obj->AsClass<kVerifyNone>()) + ">";
-  }
-  return result;
-}
-
-void Heap::DumpObject(std::ostream& stream, mirror::Object* obj) {
-  if (obj == nullptr) {
-    stream << "(obj=null)";
-    return;
-  }
-  if (IsAligned<kObjectAlignment>(obj)) {
-    space::Space* space = nullptr;
-    // Don't use find space since it only finds spaces which actually contain objects instead of
-    // spaces which may contain objects (e.g. cleared bump pointer spaces).
-    for (const auto& cur_space : continuous_spaces_) {
-      if (cur_space->HasAddress(obj)) {
-        space = cur_space;
-        break;
-      }
-    }
-    // Unprotect all the spaces.
-    for (const auto& con_space : continuous_spaces_) {
-      mprotect(con_space->Begin(), con_space->Capacity(), PROT_READ | PROT_WRITE);
-    }
-    stream << "Object " << obj;
-    if (space != nullptr) {
-      stream << " in space " << *space;
-    }
-    mirror::Class* klass = obj->GetClass<kVerifyNone>();
-    stream << "\nclass=" << klass;
-    if (klass != nullptr) {
-      stream << " type= " << SafePrettyTypeOf(obj);
-    }
-    // Re-protect the address we faulted on.
-    mprotect(AlignDown(obj, kPageSize), kPageSize, PROT_NONE);
-  }
-}
-
 bool Heap::IsCompilingBoot() const {
   if (!Runtime::Current()->IsAotCompiler()) {
     return false;
@@ -1011,10 +943,14 @@
     }
     DecrementDisableMovingGC(self);
   } else {
+    // Since concurrent moving GC has thread suspension, also poison ObjPtr the normal case to
+    // catch bugs.
+    self->PoisonObjectPointers();
     // GCs can move objects, so don't allow this.
     ScopedAssertNoThreadSuspension ants("Visiting objects");
     DCHECK(region_space_ == nullptr);
     VisitObjectsInternal(callback, arg);
+    self->PoisonObjectPointers();
   }
 }
 
@@ -1287,6 +1223,16 @@
   }
 }
 
+ALWAYS_INLINE
+static inline AllocationListener* GetAndOverwriteAllocationListener(
+    Atomic<AllocationListener*>* storage, AllocationListener* new_value) {
+  AllocationListener* old;
+  do {
+    old = storage->LoadSequentiallyConsistent();
+  } while (!storage->CompareExchangeStrongSequentiallyConsistent(old, new_value));
+  return old;
+}
+
 Heap::~Heap() {
   VLOG(heap) << "Starting ~Heap()";
   STLDeleteElements(&garbage_collectors_);
@@ -1307,36 +1253,46 @@
         << " total=" << seen_backtrace_count_.LoadRelaxed() +
             unique_backtrace_count_.LoadRelaxed();
   }
+
   VLOG(heap) << "Finished ~Heap()";
 }
 
-space::ContinuousSpace* Heap::FindContinuousSpaceFromObject(const mirror::Object* obj,
-                                                            bool fail_ok) const {
+
+space::ContinuousSpace* Heap::FindContinuousSpaceFromAddress(const mirror::Object* addr) const {
   for (const auto& space : continuous_spaces_) {
-    if (space->Contains(obj)) {
+    if (space->Contains(addr)) {
       return space;
     }
   }
-  if (!fail_ok) {
-    LOG(FATAL) << "object " << reinterpret_cast<const void*>(obj) << " not inside any spaces!";
-  }
   return nullptr;
 }
 
-space::DiscontinuousSpace* Heap::FindDiscontinuousSpaceFromObject(const mirror::Object* obj,
+space::ContinuousSpace* Heap::FindContinuousSpaceFromObject(ObjPtr<mirror::Object> obj,
+                                                            bool fail_ok) const {
+  space::ContinuousSpace* space = FindContinuousSpaceFromAddress(obj.Ptr());
+  if (space != nullptr) {
+    return space;
+  }
+  if (!fail_ok) {
+    LOG(FATAL) << "object " << obj << " not inside any spaces!";
+  }
+  return nullptr;
+}
+
+space::DiscontinuousSpace* Heap::FindDiscontinuousSpaceFromObject(ObjPtr<mirror::Object> obj,
                                                                   bool fail_ok) const {
   for (const auto& space : discontinuous_spaces_) {
-    if (space->Contains(obj)) {
+    if (space->Contains(obj.Ptr())) {
       return space;
     }
   }
   if (!fail_ok) {
-    LOG(FATAL) << "object " << reinterpret_cast<const void*>(obj) << " not inside any spaces!";
+    LOG(FATAL) << "object " << obj << " not inside any spaces!";
   }
   return nullptr;
 }
 
-space::Space* Heap::FindSpaceFromObject(const mirror::Object* obj, bool fail_ok) const {
+space::Space* Heap::FindSpaceFromObject(ObjPtr<mirror::Object> obj, bool fail_ok) const {
   space::Space* result = FindContinuousSpaceFromObject(obj, true);
   if (result != nullptr) {
     return result;
@@ -1344,6 +1300,21 @@
   return FindDiscontinuousSpaceFromObject(obj, fail_ok);
 }
 
+space::Space* Heap::FindSpaceFromAddress(const void* addr) const {
+  for (const auto& space : continuous_spaces_) {
+    if (space->Contains(reinterpret_cast<const mirror::Object*>(addr))) {
+      return space;
+    }
+  }
+  for (const auto& space : discontinuous_spaces_) {
+    if (space->Contains(reinterpret_cast<const mirror::Object*>(addr))) {
+      return space;
+    }
+  }
+  return nullptr;
+}
+
+
 void Heap::ThrowOutOfMemoryError(Thread* self, size_t byte_count, AllocatorType allocator_type) {
   // If we're in a stack overflow, do not create a new exception. It would require running the
   // constructor, which will of course still be in a stack overflow.
@@ -1408,6 +1379,8 @@
     // Deflate the monitors, this can cause a pause but shouldn't matter since we don't care
     // about pauses.
     ScopedTrace trace("Deflating monitors");
+    // Avoid race conditions on the lock word for CC.
+    ScopedGCCriticalSection gcs(self, kGcCauseTrim, kCollectorTypeHeapTrim);
     ScopedSuspendAll ssa(__FUNCTION__);
     uint64_t start_time = NanoTime();
     size_t count = runtime->GetMonitorList()->DeflateMonitors();
@@ -1508,62 +1481,49 @@
       << static_cast<int>(100 * managed_utilization) << "%.";
 }
 
-bool Heap::IsValidObjectAddress(ObjPtr<mirror::Object> obj) const {
-  // Note: we deliberately don't take the lock here, and mustn't test anything that would require
-  // taking the lock.
-  if (obj == nullptr) {
+bool Heap::IsValidObjectAddress(const void* addr) const {
+  if (addr == nullptr) {
     return true;
   }
-  return IsAligned<kObjectAlignment>(obj.Ptr()) &&
-      FindSpaceFromObject(obj.Ptr(), true) != nullptr;
+  return IsAligned<kObjectAlignment>(addr) && FindSpaceFromAddress(addr) != nullptr;
 }
 
-bool Heap::IsNonDiscontinuousSpaceHeapAddress(const mirror::Object* obj) const {
-  return FindContinuousSpaceFromObject(obj, true) != nullptr;
+bool Heap::IsNonDiscontinuousSpaceHeapAddress(const void* addr) const {
+  return FindContinuousSpaceFromAddress(reinterpret_cast<const mirror::Object*>(addr)) != nullptr;
 }
 
-bool Heap::IsValidContinuousSpaceObjectAddress(const mirror::Object* obj) const {
-  if (obj == nullptr || !IsAligned<kObjectAlignment>(obj)) {
+bool Heap::IsLiveObjectLocked(ObjPtr<mirror::Object> obj,
+                              bool search_allocation_stack,
+                              bool search_live_stack,
+                              bool sorted) {
+  if (UNLIKELY(!IsAligned<kObjectAlignment>(obj.Ptr()))) {
     return false;
   }
-  for (const auto& space : continuous_spaces_) {
-    if (space->HasAddress(obj)) {
-      return true;
-    }
-  }
-  return false;
-}
-
-bool Heap::IsLiveObjectLocked(mirror::Object* obj, bool search_allocation_stack,
-                              bool search_live_stack, bool sorted) {
-  if (UNLIKELY(!IsAligned<kObjectAlignment>(obj))) {
-    return false;
-  }
-  if (bump_pointer_space_ != nullptr && bump_pointer_space_->HasAddress(obj)) {
+  if (bump_pointer_space_ != nullptr && bump_pointer_space_->HasAddress(obj.Ptr())) {
     mirror::Class* klass = obj->GetClass<kVerifyNone>();
     if (obj == klass) {
       // This case happens for java.lang.Class.
       return true;
     }
     return VerifyClassClass(klass) && IsLiveObjectLocked(klass);
-  } else if (temp_space_ != nullptr && temp_space_->HasAddress(obj)) {
+  } else if (temp_space_ != nullptr && temp_space_->HasAddress(obj.Ptr())) {
     // If we are in the allocated region of the temp space, then we are probably live (e.g. during
     // a GC). When a GC isn't running End() - Begin() is 0 which means no objects are contained.
-    return temp_space_->Contains(obj);
+    return temp_space_->Contains(obj.Ptr());
   }
-  if (region_space_ != nullptr && region_space_->HasAddress(obj)) {
+  if (region_space_ != nullptr && region_space_->HasAddress(obj.Ptr())) {
     return true;
   }
   space::ContinuousSpace* c_space = FindContinuousSpaceFromObject(obj, true);
   space::DiscontinuousSpace* d_space = nullptr;
   if (c_space != nullptr) {
-    if (c_space->GetLiveBitmap()->Test(obj)) {
+    if (c_space->GetLiveBitmap()->Test(obj.Ptr())) {
       return true;
     }
   } else {
     d_space = FindDiscontinuousSpaceFromObject(obj, true);
     if (d_space != nullptr) {
-      if (d_space->GetLiveBitmap()->Test(obj)) {
+      if (d_space->GetLiveBitmap()->Test(obj.Ptr())) {
         return true;
       }
     }
@@ -1575,20 +1535,20 @@
     }
     if (search_allocation_stack) {
       if (sorted) {
-        if (allocation_stack_->ContainsSorted(obj)) {
+        if (allocation_stack_->ContainsSorted(obj.Ptr())) {
           return true;
         }
-      } else if (allocation_stack_->Contains(obj)) {
+      } else if (allocation_stack_->Contains(obj.Ptr())) {
         return true;
       }
     }
 
     if (search_live_stack) {
       if (sorted) {
-        if (live_stack_->ContainsSorted(obj)) {
+        if (live_stack_->ContainsSorted(obj.Ptr())) {
           return true;
         }
-      } else if (live_stack_->Contains(obj)) {
+      } else if (live_stack_->Contains(obj.Ptr())) {
         return true;
       }
     }
@@ -1596,12 +1556,12 @@
   // We need to check the bitmaps again since there is a race where we mark something as live and
   // then clear the stack containing it.
   if (c_space != nullptr) {
-    if (c_space->GetLiveBitmap()->Test(obj)) {
+    if (c_space->GetLiveBitmap()->Test(obj.Ptr())) {
       return true;
     }
   } else {
     d_space = FindDiscontinuousSpaceFromObject(obj, true);
-    if (d_space != nullptr && d_space->GetLiveBitmap()->Test(obj)) {
+    if (d_space != nullptr && d_space->GetLiveBitmap()->Test(obj.Ptr())) {
       return true;
     }
   }
@@ -1631,7 +1591,7 @@
   }
 }
 
-void Heap::VerifyObjectBody(mirror::Object* obj) {
+void Heap::VerifyObjectBody(ObjPtr<mirror::Object> obj) {
   if (verify_object_mode_ == kVerifyObjectModeDisabled) {
     return;
   }
@@ -1640,7 +1600,7 @@
   if (UNLIKELY(static_cast<size_t>(num_bytes_allocated_.LoadRelaxed()) < 10 * KB)) {
     return;
   }
-  CHECK_ALIGNED(obj, kObjectAlignment) << "Object isn't aligned";
+  CHECK_ALIGNED(obj.Ptr(), kObjectAlignment) << "Object isn't aligned";
   mirror::Class* c = obj->GetFieldObject<mirror::Class, kVerifyNone>(mirror::Object::ClassOffset());
   CHECK(c != nullptr) << "Null class in object " << obj;
   CHECK_ALIGNED(c, kObjectAlignment) << "Class " << c << " not aligned in object " << obj;
@@ -1719,14 +1679,13 @@
                                              size_t* bytes_allocated,
                                              size_t* usable_size,
                                              size_t* bytes_tl_bulk_allocated,
-                                             mirror::Class** klass) {
+                                             ObjPtr<mirror::Class>* klass) {
   bool was_default_allocator = allocator == GetCurrentAllocator();
   // Make sure there is no pending exception since we may need to throw an OOME.
   self->AssertNoPendingException();
   DCHECK(klass != nullptr);
   StackHandleScope<1> hs(self);
-  HandleWrapper<mirror::Class> h(hs.NewHandleWrapper(klass));
-  klass = nullptr;  // Invalidate for safety.
+  HandleWrapperObjPtr<mirror::Class> h(hs.NewHandleWrapper(klass));
   // The allocation failed. If the GC is running, block until it completes, and then retry the
   // allocation.
   collector::GcType last_gc = WaitForGcToComplete(kGcCauseForAlloc, self);
@@ -1929,7 +1888,7 @@
 
 class InstanceCounter {
  public:
-  InstanceCounter(const std::vector<mirror::Class*>& classes,
+  InstanceCounter(const std::vector<Handle<mirror::Class>>& classes,
                   bool use_is_assignable_from,
                   uint64_t* counts)
       REQUIRES_SHARED(Locks::mutator_lock_)
@@ -1941,7 +1900,7 @@
     mirror::Class* instance_class = obj->GetClass();
     CHECK(instance_class != nullptr);
     for (size_t i = 0; i < instance_counter->classes_.size(); ++i) {
-      mirror::Class* klass = instance_counter->classes_[i];
+      ObjPtr<mirror::Class> klass = instance_counter->classes_[i].Get();
       if (instance_counter->use_is_assignable_from_) {
         if (klass != nullptr && klass->IsAssignableFrom(instance_class)) {
           ++instance_counter->counts_[i];
@@ -1953,13 +1912,14 @@
   }
 
  private:
-  const std::vector<mirror::Class*>& classes_;
+  const std::vector<Handle<mirror::Class>>& classes_;
   bool use_is_assignable_from_;
   uint64_t* const counts_;
   DISALLOW_COPY_AND_ASSIGN(InstanceCounter);
 };
 
-void Heap::CountInstances(const std::vector<mirror::Class*>& classes, bool use_is_assignable_from,
+void Heap::CountInstances(const std::vector<Handle<mirror::Class>>& classes,
+                          bool use_is_assignable_from,
                           uint64_t* counts) {
   InstanceCounter counter(classes, use_is_assignable_from, counts);
   VisitObjects(InstanceCounter::Callback, &counter);
@@ -1967,44 +1927,55 @@
 
 class InstanceCollector {
  public:
-  InstanceCollector(mirror::Class* c, int32_t max_count, std::vector<mirror::Object*>& instances)
+  InstanceCollector(VariableSizedHandleScope& scope,
+                    Handle<mirror::Class> c,
+                    int32_t max_count,
+                    std::vector<Handle<mirror::Object>>& instances)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      : class_(c), max_count_(max_count), instances_(instances) {
-  }
+      : scope_(scope),
+        class_(c),
+        max_count_(max_count),
+        instances_(instances) {}
+
   static void Callback(mirror::Object* obj, void* arg)
       REQUIRES_SHARED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
     DCHECK(arg != nullptr);
     InstanceCollector* instance_collector = reinterpret_cast<InstanceCollector*>(arg);
-    if (obj->GetClass() == instance_collector->class_) {
+    if (obj->GetClass() == instance_collector->class_.Get()) {
       if (instance_collector->max_count_ == 0 ||
           instance_collector->instances_.size() < instance_collector->max_count_) {
-        instance_collector->instances_.push_back(obj);
+        instance_collector->instances_.push_back(instance_collector->scope_.NewHandle(obj));
       }
     }
   }
 
  private:
-  const mirror::Class* const class_;
+  VariableSizedHandleScope& scope_;
+  Handle<mirror::Class> const class_;
   const uint32_t max_count_;
-  std::vector<mirror::Object*>& instances_;
+  std::vector<Handle<mirror::Object>>& instances_;
   DISALLOW_COPY_AND_ASSIGN(InstanceCollector);
 };
 
-void Heap::GetInstances(mirror::Class* c,
+void Heap::GetInstances(VariableSizedHandleScope& scope,
+                        Handle<mirror::Class> c,
                         int32_t max_count,
-                        std::vector<mirror::Object*>& instances) {
-  InstanceCollector collector(c, max_count, instances);
+                        std::vector<Handle<mirror::Object>>& instances) {
+  InstanceCollector collector(scope, c, max_count, instances);
   VisitObjects(&InstanceCollector::Callback, &collector);
 }
 
 class ReferringObjectsFinder {
  public:
-  ReferringObjectsFinder(mirror::Object* object,
+  ReferringObjectsFinder(VariableSizedHandleScope& scope,
+                         Handle<mirror::Object> object,
                          int32_t max_count,
-                         std::vector<mirror::Object*>& referring_objects)
+                         std::vector<Handle<mirror::Object>>& referring_objects)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      : object_(object), max_count_(max_count), referring_objects_(referring_objects) {
-  }
+      : scope_(scope),
+        object_(object),
+        max_count_(max_count),
+        referring_objects_(referring_objects) {}
 
   static void Callback(mirror::Object* obj, void* arg)
       REQUIRES_SHARED(Locks::mutator_lock_, Locks::heap_bitmap_lock_) {
@@ -2014,16 +1985,18 @@
   // For bitmap Visit.
   // TODO: Fix lock analysis to not use NO_THREAD_SAFETY_ANALYSIS, requires support for
   // annotalysis on visitors.
-  void operator()(mirror::Object* o) const NO_THREAD_SAFETY_ANALYSIS {
+  void operator()(ObjPtr<mirror::Object> o) const NO_THREAD_SAFETY_ANALYSIS {
     o->VisitReferences(*this, VoidFunctor());
   }
 
   // For Object::VisitReferences.
-  void operator()(mirror::Object* obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const
+  void operator()(ObjPtr<mirror::Object> obj,
+                  MemberOffset offset,
+                  bool is_static ATTRIBUTE_UNUSED) const
       REQUIRES_SHARED(Locks::mutator_lock_) {
     mirror::Object* ref = obj->GetFieldObject<mirror::Object>(offset);
-    if (ref == object_ && (max_count_ == 0 || referring_objects_.size() < max_count_)) {
-      referring_objects_.push_back(obj);
+    if (ref == object_.Get() && (max_count_ == 0 || referring_objects_.size() < max_count_)) {
+      referring_objects_.push_back(scope_.NewHandle(obj));
     }
   }
 
@@ -2032,15 +2005,18 @@
   void VisitRoot(mirror::CompressedReference<mirror::Object>* root ATTRIBUTE_UNUSED) const {}
 
  private:
-  const mirror::Object* const object_;
+  VariableSizedHandleScope& scope_;
+  Handle<mirror::Object> const object_;
   const uint32_t max_count_;
-  std::vector<mirror::Object*>& referring_objects_;
+  std::vector<Handle<mirror::Object>>& referring_objects_;
   DISALLOW_COPY_AND_ASSIGN(ReferringObjectsFinder);
 };
 
-void Heap::GetReferringObjects(mirror::Object* o, int32_t max_count,
-                               std::vector<mirror::Object*>& referring_objects) {
-  ReferringObjectsFinder finder(o, max_count, referring_objects);
+void Heap::GetReferringObjects(VariableSizedHandleScope& scope,
+                               Handle<mirror::Object> o,
+                               int32_t max_count,
+                               std::vector<Handle<mirror::Object>>& referring_objects) {
+  ReferringObjectsFinder finder(scope, o, max_count, referring_objects);
   VisitObjects(&ReferringObjectsFinder::Callback, &finder);
 }
 
@@ -2434,13 +2410,9 @@
     }
     // Copy the object over to its new location. Don't use alloc_size to avoid valgrind error.
     memcpy(reinterpret_cast<void*>(forward_address), obj, obj_size);
-    if (kUseBakerOrBrooksReadBarrier) {
-      obj->AssertReadBarrierPointer();
-      if (kUseBrooksReadBarrier) {
-        DCHECK_EQ(forward_address->GetReadBarrierPointer(), obj);
-        forward_address->SetReadBarrierPointer(forward_address);
-      }
-      forward_address->AssertReadBarrierPointer();
+    if (kUseBakerReadBarrier) {
+      obj->AssertReadBarrierState();
+      forward_address->AssertReadBarrierState();
     }
     return forward_address;
   }
@@ -2521,6 +2493,8 @@
     } else {
       if (collector_type_ == kCollectorTypeCC) {
         region_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
+        // Evacuated everything out of the region space, clear the mark bitmap.
+        region_space_->GetMarkBitmap()->Clear();
       } else {
         bump_pointer_space_->GetMemMap()->Protect(PROT_READ | PROT_WRITE);
       }
@@ -2736,7 +2710,6 @@
         collector = semi_space_collector_;
         break;
       case kCollectorTypeCC:
-        concurrent_copying_collector_->SetRegionSpace(region_space_);
         collector = concurrent_copying_collector_;
         break;
       case kCollectorTypeMC:
@@ -2922,19 +2895,21 @@
     return fail_count_->LoadSequentiallyConsistent();
   }
 
-  void operator()(mirror::Class* klass ATTRIBUTE_UNUSED, mirror::Reference* ref) const
+  void operator()(ObjPtr<mirror::Class> klass ATTRIBUTE_UNUSED, ObjPtr<mirror::Reference> ref) const
       REQUIRES_SHARED(Locks::mutator_lock_) {
     if (verify_referent_) {
-      VerifyReference(ref, ref->GetReferent(), mirror::Reference::ReferentOffset());
+      VerifyReference(ref.Ptr(), ref->GetReferent(), mirror::Reference::ReferentOffset());
     }
   }
 
-  void operator()(mirror::Object* obj, MemberOffset offset, bool is_static ATTRIBUTE_UNUSED) const
+  void operator()(ObjPtr<mirror::Object> obj,
+                  MemberOffset offset,
+                  bool is_static ATTRIBUTE_UNUSED) const
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    VerifyReference(obj, obj->GetFieldObject<mirror::Object>(offset), offset);
+    VerifyReference(obj.Ptr(), obj->GetFieldObject<mirror::Object>(offset), offset);
   }
 
-  bool IsLive(mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS {
+  bool IsLive(ObjPtr<mirror::Object> obj) const NO_THREAD_SAFETY_ANALYSIS {
     return heap_->IsLiveObjectLocked(obj, true, false, true);
   }
 
@@ -2955,7 +2930,7 @@
     if (root == nullptr) {
       LOG(ERROR) << "Root is null with info " << root_info.GetType();
     } else if (!VerifyReference(nullptr, root, MemberOffset(0))) {
-      LOG(ERROR) << "Root " << root << " is dead with type " << PrettyTypeOf(root)
+      LOG(ERROR) << "Root " << root << " is dead with type " << mirror::Object::PrettyTypeOf(root)
           << " thread_id= " << root_info.GetThreadId() << " root_type= " << root_info.GetType();
     }
   }
@@ -2982,7 +2957,7 @@
       LOG(ERROR) << "Object " << obj << " references dead object " << ref << " at offset "
                  << offset << "\n card value = " << static_cast<int>(*card_addr);
       if (heap_->IsValidObjectAddress(obj->GetClass())) {
-        LOG(ERROR) << "Obj type " << PrettyTypeOf(obj);
+        LOG(ERROR) << "Obj type " << obj->PrettyTypeOf();
       } else {
         LOG(ERROR) << "Object " << obj << " class(" << obj->GetClass() << ") not a heap address";
       }
@@ -2994,7 +2969,7 @@
         mirror::Class* ref_class = space->FindRecentFreedObject(ref);
         if (ref_class != nullptr) {
           LOG(ERROR) << "Reference " << ref << " found as a recently freed object with class "
-                     << PrettyClass(ref_class);
+                     << ref_class->PrettyClass();
         } else {
           LOG(ERROR) << "Reference " << ref << " not found as a recently freed object";
         }
@@ -3002,7 +2977,7 @@
 
       if (ref->GetClass() != nullptr && heap_->IsValidObjectAddress(ref->GetClass()) &&
           ref->GetClass()->IsClass()) {
-        LOG(ERROR) << "Ref type " << PrettyTypeOf(ref);
+        LOG(ERROR) << "Ref type " << ref->PrettyTypeOf();
       } else {
         LOG(ERROR) << "Ref " << ref << " class(" << ref->GetClass()
                    << ") is not a valid heap address";
@@ -3098,41 +3073,42 @@
   const bool verify_referent_;
 };
 
-void Heap::PushOnAllocationStackWithInternalGC(Thread* self, mirror::Object** obj) {
+void Heap::PushOnAllocationStackWithInternalGC(Thread* self, ObjPtr<mirror::Object>* obj) {
   // Slow path, the allocation stack push back must have already failed.
-  DCHECK(!allocation_stack_->AtomicPushBack(*obj));
+  DCHECK(!allocation_stack_->AtomicPushBack(obj->Ptr()));
   do {
     // TODO: Add handle VerifyObject.
     StackHandleScope<1> hs(self);
-    HandleWrapper<mirror::Object> wrapper(hs.NewHandleWrapper(obj));
+    HandleWrapperObjPtr<mirror::Object> wrapper(hs.NewHandleWrapper(obj));
     // Push our object into the reserve region of the allocaiton stack. This is only required due
     // to heap verification requiring that roots are live (either in the live bitmap or in the
     // allocation stack).
-    CHECK(allocation_stack_->AtomicPushBackIgnoreGrowthLimit(*obj));
+    CHECK(allocation_stack_->AtomicPushBackIgnoreGrowthLimit(obj->Ptr()));
     CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false);
-  } while (!allocation_stack_->AtomicPushBack(*obj));
+  } while (!allocation_stack_->AtomicPushBack(obj->Ptr()));
 }
 
-void Heap::PushOnThreadLocalAllocationStackWithInternalGC(Thread* self, mirror::Object** obj) {
+void Heap::PushOnThreadLocalAllocationStackWithInternalGC(Thread* self,
+                                                          ObjPtr<mirror::Object>* obj) {
   // Slow path, the allocation stack push back must have already failed.
-  DCHECK(!self->PushOnThreadLocalAllocationStack(*obj));
+  DCHECK(!self->PushOnThreadLocalAllocationStack(obj->Ptr()));
   StackReference<mirror::Object>* start_address;
   StackReference<mirror::Object>* end_address;
   while (!allocation_stack_->AtomicBumpBack(kThreadLocalAllocationStackSize, &start_address,
                                             &end_address)) {
     // TODO: Add handle VerifyObject.
     StackHandleScope<1> hs(self);
-    HandleWrapper<mirror::Object> wrapper(hs.NewHandleWrapper(obj));
+    HandleWrapperObjPtr<mirror::Object> wrapper(hs.NewHandleWrapper(obj));
     // Push our object into the reserve region of the allocaiton stack. This is only required due
     // to heap verification requiring that roots are live (either in the live bitmap or in the
     // allocation stack).
-    CHECK(allocation_stack_->AtomicPushBackIgnoreGrowthLimit(*obj));
+    CHECK(allocation_stack_->AtomicPushBackIgnoreGrowthLimit(obj->Ptr()));
     // Push into the reserve allocation stack.
     CollectGarbageInternal(collector::kGcTypeSticky, kGcCauseForAlloc, false);
   }
   self->SetThreadLocalAllocationStack(start_address, end_address);
   // Retry on the new thread-local allocation stack.
-  CHECK(self->PushOnThreadLocalAllocationStack(*obj));  // Must succeed.
+  CHECK(self->PushOnThreadLocalAllocationStack(obj->Ptr()));  // Must succeed.
 }
 
 // Must do this with mutators suspended since we are directly accessing the allocation stacks.
@@ -3210,8 +3186,9 @@
           if (heap_->GetLiveBitmap()->Test(obj)) {
             LOG(ERROR) << "Object " << obj << " found in live bitmap";
           }
-          LOG(ERROR) << "Object " << obj << " " << PrettyTypeOf(obj)
-                    << " references " << ref << " " << PrettyTypeOf(ref) << " in live stack";
+          LOG(ERROR) << "Object " << obj << " " << mirror::Object::PrettyTypeOf(obj)
+                    << " references " << ref << " " << mirror::Object::PrettyTypeOf(ref)
+                    << " in live stack";
 
           // Print which field of the object is dead.
           if (!obj->IsObjectArray()) {
@@ -3220,7 +3197,7 @@
             for (ArtField& field : (is_static ? klass->GetSFields() : klass->GetIFields())) {
               if (field.GetOffset().Int32Value() == offset.Int32Value()) {
                 LOG(ERROR) << (is_static ? "Static " : "") << "field in the live stack is "
-                           << PrettyField(&field);
+                           << field.PrettyField();
                 break;
               }
             }
@@ -3351,7 +3328,7 @@
       const char* name = space->IsZygoteSpace() ? "ZygoteModUnionClearCards" :
           "ImageModUnionClearCards";
       TimingLogger::ScopedTiming t2(name, timings);
-      table->ClearCards();
+      table->ProcessCards();
     } else if (use_rem_sets && rem_set != nullptr) {
       DCHECK(collector::SemiSpace::kUseRememberedSet && collector_type_ == kCollectorTypeGSS)
           << static_cast<int>(collector_type_);
@@ -3722,19 +3699,21 @@
   }
 }
 
-void Heap::AddFinalizerReference(Thread* self, mirror::Object** object) {
+void Heap::AddFinalizerReference(Thread* self, ObjPtr<mirror::Object>* object) {
   ScopedObjectAccess soa(self);
   ScopedLocalRef<jobject> arg(self->GetJniEnv(), soa.AddLocalReference<jobject>(*object));
   jvalue args[1];
   args[0].l = arg.get();
   InvokeWithJValues(soa, nullptr, WellKnownClasses::java_lang_ref_FinalizerReference_add, args);
   // Restore object in case it gets moved.
-  *object = soa.Decode<mirror::Object>(arg.get()).Ptr();
+  *object = soa.Decode<mirror::Object>(arg.get());
 }
 
-void Heap::RequestConcurrentGCAndSaveObject(Thread* self, bool force_full, mirror::Object** obj) {
+void Heap::RequestConcurrentGCAndSaveObject(Thread* self,
+                                            bool force_full,
+                                            ObjPtr<mirror::Object>* obj) {
   StackHandleScope<1> hs(self);
-  HandleWrapper<mirror::Object> wrapper(hs.NewHandleWrapper(obj));
+  HandleWrapperObjPtr<mirror::Object> wrapper(hs.NewHandleWrapper(obj));
   RequestConcurrentGC(self, force_full);
 }
 
@@ -4011,7 +3990,7 @@
   mod_union_tables_.Put(mod_union_table->GetSpace(), mod_union_table);
 }
 
-void Heap::CheckPreconditionsForAllocObject(mirror::Class* c, size_t byte_count) {
+void Heap::CheckPreconditionsForAllocObject(ObjPtr<mirror::Class> c, size_t byte_count) {
   CHECK(c == nullptr || (c->IsClassClass() && byte_count >= sizeof(mirror::Class)) ||
         (c->IsVariableSize() || c->GetObjectSize() == byte_count)) << c->GetClassFlags();
   CHECK_GE(byte_count, sizeof(mirror::Object));
@@ -4090,7 +4069,6 @@
 }
 
 void Heap::BroadcastForNewAllocationRecords() const {
-  CHECK(kUseReadBarrier);
   // Always broadcast without checking IsAllocTrackingEnabled() because IsAllocTrackingEnabled() may
   // be set to false while some threads are waiting for system weak access in
   // AllocRecordObjectMap::RecordAllocation() and we may fail to wake them up. b/27467554.
@@ -4137,7 +4115,7 @@
   return state.GetFrameCount();
 }
 
-void Heap::CheckGcStressMode(Thread* self, mirror::Object** obj) {
+void Heap::CheckGcStressMode(Thread* self, ObjPtr<mirror::Object>* obj) {
   auto* const runtime = Runtime::Current();
   if (gc_stress_mode_ && runtime->GetClassLinker()->IsInitialized() &&
       !runtime->IsActiveTransaction() && mirror::Class::HasJavaLangClass()) {
@@ -4176,9 +4154,9 @@
   gc_disabled_for_shutdown_ = true;
 }
 
-bool Heap::ObjectIsInBootImageSpace(mirror::Object* obj) const {
+bool Heap::ObjectIsInBootImageSpace(ObjPtr<mirror::Object> obj) const {
   for (gc::space::ImageSpace* space : boot_image_spaces_) {
-    if (space->HasAddress(obj)) {
+    if (space->HasAddress(obj.Ptr())) {
       return true;
     }
   }
@@ -4223,5 +4201,29 @@
   }
 }
 
+void Heap::SetAllocationListener(AllocationListener* l) {
+  AllocationListener* old = GetAndOverwriteAllocationListener(&alloc_listener_, l);
+
+  if (old == nullptr) {
+    Runtime::Current()->GetInstrumentation()->InstrumentQuickAllocEntryPoints();
+  }
+}
+
+void Heap::RemoveAllocationListener() {
+  AllocationListener* old = GetAndOverwriteAllocationListener(&alloc_listener_, nullptr);
+
+  if (old != nullptr) {
+    Runtime::Current()->GetInstrumentation()->UninstrumentQuickAllocEntryPoints();
+  }
+}
+
+void Heap::SetGcPauseListener(GcPauseListener* l) {
+  gc_pause_listener_.StoreRelaxed(l);
+}
+
+void Heap::RemoveGcPauseListener() {
+  gc_pause_listener_.StoreRelaxed(nullptr);
+}
+
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index e32f057..0c671d2 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -34,6 +34,7 @@
 #include "gc/collector_type.h"
 #include "gc/space/large_object_space.h"
 #include "globals.h"
+#include "handle.h"
 #include "obj_ptr.h"
 #include "object_callbacks.h"
 #include "offsets.h"
@@ -49,6 +50,7 @@
 class Thread;
 class ThreadPool;
 class TimingLogger;
+class VariableSizedHandleScope;
 
 namespace mirror {
   class Class;
@@ -57,7 +59,9 @@
 
 namespace gc {
 
+class AllocationListener;
 class AllocRecordObjectMap;
+class GcPauseListener;
 class ReferenceProcessor;
 class TaskProcessor;
 
@@ -193,36 +197,48 @@
   // Allocates and initializes storage for an object instance.
   template <bool kInstrumented, typename PreFenceVisitor>
   mirror::Object* AllocObject(Thread* self,
-                              mirror::Class* klass,
+                              ObjPtr<mirror::Class> klass,
                               size_t num_bytes,
                               const PreFenceVisitor& pre_fence_visitor)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!*gc_complete_lock_, !*pending_task_lock_, !*backtrace_lock_,
+      REQUIRES(!*gc_complete_lock_,
+               !*pending_task_lock_,
+               !*backtrace_lock_,
                !Roles::uninterruptible_) {
-    return AllocObjectWithAllocator<kInstrumented, true>(
-        self, klass, num_bytes, GetCurrentAllocator(), pre_fence_visitor);
+    return AllocObjectWithAllocator<kInstrumented, true>(self,
+                                                         klass,
+                                                         num_bytes,
+                                                         GetCurrentAllocator(),
+                                                         pre_fence_visitor);
   }
 
   template <bool kInstrumented, typename PreFenceVisitor>
   mirror::Object* AllocNonMovableObject(Thread* self,
-                                        mirror::Class* klass,
+                                        ObjPtr<mirror::Class> klass,
                                         size_t num_bytes,
                                         const PreFenceVisitor& pre_fence_visitor)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!*gc_complete_lock_, !*pending_task_lock_, !*backtrace_lock_,
+      REQUIRES(!*gc_complete_lock_,
+               !*pending_task_lock_,
+               !*backtrace_lock_,
                !Roles::uninterruptible_) {
-    return AllocObjectWithAllocator<kInstrumented, true>(
-        self, klass, num_bytes, GetCurrentNonMovingAllocator(), pre_fence_visitor);
+    return AllocObjectWithAllocator<kInstrumented, true>(self,
+                                                         klass,
+                                                         num_bytes,
+                                                         GetCurrentNonMovingAllocator(),
+                                                         pre_fence_visitor);
   }
 
   template <bool kInstrumented, bool kCheckLargeObject, typename PreFenceVisitor>
   ALWAYS_INLINE mirror::Object* AllocObjectWithAllocator(Thread* self,
-                                                         mirror::Class* klass,
+                                                         ObjPtr<mirror::Class> klass,
                                                          size_t byte_count,
                                                          AllocatorType allocator,
                                                          const PreFenceVisitor& pre_fence_visitor)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!*gc_complete_lock_, !*pending_task_lock_, !*backtrace_lock_,
+      REQUIRES(!*gc_complete_lock_,
+               !*pending_task_lock_,
+               !*backtrace_lock_,
                !Roles::uninterruptible_);
 
   AllocatorType GetCurrentAllocator() const {
@@ -240,7 +256,7 @@
   void VisitObjectsPaused(ObjectCallback callback, void* arg)
       REQUIRES(Locks::mutator_lock_, !Locks::heap_bitmap_lock_, !*gc_complete_lock_);
 
-  void CheckPreconditionsForAllocObject(mirror::Class* c, size_t byte_count)
+  void CheckPreconditionsForAllocObject(ObjPtr<mirror::Class> c, size_t byte_count)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   void RegisterNativeAllocation(JNIEnv* env, size_t bytes)
@@ -262,7 +278,7 @@
   // The given reference is believed to be to an object in the Java heap, check the soundness of it.
   // TODO: NO_THREAD_SAFETY_ANALYSIS since we call this everywhere and it is impossible to find a
   // proper lock ordering for it.
-  void VerifyObjectBody(mirror::Object* o) NO_THREAD_SAFETY_ANALYSIS;
+  void VerifyObjectBody(ObjPtr<mirror::Object> o) NO_THREAD_SAFETY_ANALYSIS;
 
   // Check sanity of all live references.
   void VerifyHeap() REQUIRES(!Locks::heap_bitmap_lock_);
@@ -275,16 +291,16 @@
   // A weaker test than IsLiveObject or VerifyObject that doesn't require the heap lock,
   // and doesn't abort on error, allowing the caller to report more
   // meaningful diagnostics.
-  bool IsValidObjectAddress(ObjPtr<mirror::Object> obj) const REQUIRES_SHARED(Locks::mutator_lock_);
+  bool IsValidObjectAddress(const void* obj) const REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Faster alternative to IsHeapAddress since finding if an object is in the large object space is
   // very slow.
-  bool IsNonDiscontinuousSpaceHeapAddress(const mirror::Object* obj) const
+  bool IsNonDiscontinuousSpaceHeapAddress(const void* addr) const
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Returns true if 'obj' is a live heap object, false otherwise (including for invalid addresses).
   // Requires the heap lock to be held.
-  bool IsLiveObjectLocked(mirror::Object* obj,
+  bool IsLiveObjectLocked(ObjPtr<mirror::Object> obj,
                           bool search_allocation_stack = true,
                           bool search_live_stack = true,
                           bool sorted = false)
@@ -320,19 +336,25 @@
 
   // Implements VMDebug.countInstancesOfClass and JDWP VM_InstanceCount.
   // The boolean decides whether to use IsAssignableFrom or == when comparing classes.
-  void CountInstances(const std::vector<mirror::Class*>& classes,
+  void CountInstances(const std::vector<Handle<mirror::Class>>& classes,
                       bool use_is_assignable_from,
                       uint64_t* counts)
       REQUIRES(!Locks::heap_bitmap_lock_, !*gc_complete_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
+
   // Implements JDWP RT_Instances.
-  void GetInstances(mirror::Class* c, int32_t max_count, std::vector<mirror::Object*>& instances)
+  void GetInstances(VariableSizedHandleScope& scope,
+                    Handle<mirror::Class> c,
+                    int32_t max_count,
+                    std::vector<Handle<mirror::Object>>& instances)
       REQUIRES(!Locks::heap_bitmap_lock_, !*gc_complete_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
+
   // Implements JDWP OR_ReferringObjects.
-  void GetReferringObjects(mirror::Object* o,
+  void GetReferringObjects(VariableSizedHandleScope& scope,
+                           Handle<mirror::Object> o,
                            int32_t max_count,
-                           std::vector<mirror::Object*>& referring_objects)
+                           std::vector<Handle<mirror::Object>>& referring_objects)
       REQUIRES(!Locks::heap_bitmap_lock_, !*gc_complete_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -438,23 +460,20 @@
 
   // Must be called if a field of an Object in the heap changes, and before any GC safe-point.
   // The call is not needed if null is stored in the field.
-  ALWAYS_INLINE void WriteBarrierField(const mirror::Object* dst,
-                                       MemberOffset offset ATTRIBUTE_UNUSED,
-                                       const mirror::Object* new_value ATTRIBUTE_UNUSED) {
-    card_table_->MarkCard(dst);
-  }
+  ALWAYS_INLINE void WriteBarrierField(ObjPtr<mirror::Object> dst,
+                                       MemberOffset offset,
+                                       ObjPtr<mirror::Object> new_value)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Write barrier for array operations that update many field positions
-  ALWAYS_INLINE void WriteBarrierArray(const mirror::Object* dst,
-                                       int start_offset ATTRIBUTE_UNUSED,
+  ALWAYS_INLINE void WriteBarrierArray(ObjPtr<mirror::Object> dst,
+                                       int start_offset,
                                        // TODO: element_count or byte_count?
-                                       size_t length ATTRIBUTE_UNUSED) {
-    card_table_->MarkCard(dst);
-  }
+                                       size_t length)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ALWAYS_INLINE void WriteBarrierEveryFieldOf(const mirror::Object* obj) {
-    card_table_->MarkCard(obj);
-  }
+  ALWAYS_INLINE void WriteBarrierEveryFieldOf(ObjPtr<mirror::Object> obj)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   accounting::CardTable* GetCardTable() const {
     return card_table_.get();
@@ -464,7 +483,7 @@
     return rb_table_.get();
   }
 
-  void AddFinalizerReference(Thread* self, mirror::Object** object);
+  void AddFinalizerReference(Thread* self, ObjPtr<mirror::Object>* object);
 
   // Returns the number of bytes currently allocated.
   size_t GetBytesAllocated() const {
@@ -527,12 +546,20 @@
   // get the space that corresponds to an object's address. Current implementation searches all
   // spaces in turn. If fail_ok is false then failing to find a space will cause an abort.
   // TODO: consider using faster data structure like binary tree.
-  space::ContinuousSpace* FindContinuousSpaceFromObject(const mirror::Object*, bool fail_ok) const
+  space::ContinuousSpace* FindContinuousSpaceFromObject(ObjPtr<mirror::Object>, bool fail_ok) const
       REQUIRES_SHARED(Locks::mutator_lock_);
-  space::DiscontinuousSpace* FindDiscontinuousSpaceFromObject(const mirror::Object*,
+
+  space::ContinuousSpace* FindContinuousSpaceFromAddress(const mirror::Object* addr) const
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  space::DiscontinuousSpace* FindDiscontinuousSpaceFromObject(ObjPtr<mirror::Object>,
                                                               bool fail_ok) const
       REQUIRES_SHARED(Locks::mutator_lock_);
-  space::Space* FindSpaceFromObject(const mirror::Object*, bool fail_ok) const
+
+  space::Space* FindSpaceFromObject(ObjPtr<mirror::Object> obj, bool fail_ok) const
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  space::Space* FindSpaceFromAddress(const void* ptr) const
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   void DumpForSigQuit(std::ostream& os) REQUIRES(!*gc_complete_lock_, !native_histogram_lock_);
@@ -598,7 +625,7 @@
     return boot_image_spaces_;
   }
 
-  bool ObjectIsInBootImageSpace(mirror::Object* obj) const
+  bool ObjectIsInBootImageSpace(ObjPtr<mirror::Object> obj) const
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   bool IsInBootImageOatFile(const void* p) const
@@ -650,12 +677,6 @@
   void DumpSpaces(std::ostream& stream) const REQUIRES_SHARED(Locks::mutator_lock_);
   std::string DumpSpaces() const REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Dump object should only be used by the signal handler.
-  void DumpObject(std::ostream& stream, mirror::Object* obj) NO_THREAD_SAFETY_ANALYSIS;
-  // Safe version of pretty type of which check to make sure objects are heap addresses.
-  std::string SafeGetClassDescriptor(mirror::Class* klass) NO_THREAD_SAFETY_ANALYSIS;
-  std::string SafePrettyTypeOf(mirror::Object* obj) NO_THREAD_SAFETY_ANALYSIS;
-
   // GC performance measuring
   void DumpGcPerformanceInfo(std::ostream& os)
       REQUIRES(!*gc_complete_lock_, !native_histogram_lock_);
@@ -776,7 +797,6 @@
       REQUIRES(!Locks::alloc_tracker_lock_);
 
   void BroadcastForNewAllocationRecords() const
-      REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::alloc_tracker_lock_);
 
   void DisableGCForShutdown() REQUIRES(!*gc_complete_lock_);
@@ -785,6 +805,22 @@
   HomogeneousSpaceCompactResult PerformHomogeneousSpaceCompact() REQUIRES(!*gc_complete_lock_);
   bool SupportHomogeneousSpaceCompactAndCollectorTransitions() const;
 
+  // Install an allocation listener.
+  void SetAllocationListener(AllocationListener* l);
+  // Remove an allocation listener. Note: the listener must not be deleted, as for performance
+  // reasons, we assume it stays valid when we read it (so that we don't require a lock).
+  void RemoveAllocationListener();
+
+  // Install a gc pause listener.
+  void SetGcPauseListener(GcPauseListener* l);
+  // Get the currently installed gc pause listener, or null.
+  GcPauseListener* GetGcPauseListener() {
+    return gc_pause_listener_.LoadAcquire();
+  }
+  // Remove a gc pause listener. Note: the listener must not be deleted, as for performance
+  // reasons, we assume it stays valid when we read it (so that we don't require a lock).
+  void RemoveGcPauseListener();
+
  private:
   class ConcurrentGCTask;
   class CollectorTransitionTask;
@@ -831,11 +867,11 @@
         collector_type == kCollectorTypeMC ||
         collector_type == kCollectorTypeHomogeneousSpaceCompact;
   }
-  bool ShouldAllocLargeObject(mirror::Class* c, size_t byte_count) const
+  bool ShouldAllocLargeObject(ObjPtr<mirror::Class> c, size_t byte_count) const
       REQUIRES_SHARED(Locks::mutator_lock_);
   ALWAYS_INLINE void CheckConcurrentGC(Thread* self,
                                        size_t new_num_bytes_allocated,
-                                       mirror::Object** obj)
+                                       ObjPtr<mirror::Object>* obj)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!*pending_task_lock_, !*gc_complete_lock_);
 
@@ -846,7 +882,7 @@
   // We don't force this to be inlined since it is a slow path.
   template <bool kInstrumented, typename PreFenceVisitor>
   mirror::Object* AllocLargeObject(Thread* self,
-                                   mirror::Class** klass,
+                                   ObjPtr<mirror::Class>* klass,
                                    size_t byte_count,
                                    const PreFenceVisitor& pre_fence_visitor)
       REQUIRES_SHARED(Locks::mutator_lock_)
@@ -861,14 +897,14 @@
                                          size_t* bytes_allocated,
                                          size_t* usable_size,
                                          size_t* bytes_tl_bulk_allocated,
-                                         mirror::Class** klass)
+                                         ObjPtr<mirror::Class>* klass)
       REQUIRES(!Locks::thread_suspend_count_lock_, !*gc_complete_lock_, !*pending_task_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Allocate into a specific space.
   mirror::Object* AllocateInto(Thread* self,
                                space::AllocSpace* space,
-                               mirror::Class* c,
+                               ObjPtr<mirror::Class> c,
                                size_t bytes)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -893,10 +929,6 @@
   template <bool kGrow>
   ALWAYS_INLINE bool IsOutOfMemoryOnAllocation(AllocatorType allocator_type, size_t alloc_size);
 
-  // Returns true if the address passed in is within the address range of a continuous space.
-  bool IsValidContinuousSpaceObjectAddress(const mirror::Object* obj) const
-      REQUIRES_SHARED(Locks::mutator_lock_);
-
   // Run the finalizers. If timeout is non zero, then we use the VMRuntime version.
   void RunFinalization(JNIEnv* env, uint64_t timeout);
 
@@ -908,7 +940,7 @@
   void RequestCollectorTransition(CollectorType desired_collector_type, uint64_t delta_time)
       REQUIRES(!*pending_task_lock_);
 
-  void RequestConcurrentGCAndSaveObject(Thread* self, bool force_full, mirror::Object** obj)
+  void RequestConcurrentGCAndSaveObject(Thread* self, bool force_full, ObjPtr<mirror::Object>* obj)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!*pending_task_lock_);
   bool IsGCRequestPending() const;
@@ -980,13 +1012,13 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Push an object onto the allocation stack.
-  void PushOnAllocationStack(Thread* self, mirror::Object** obj)
+  void PushOnAllocationStack(Thread* self, ObjPtr<mirror::Object>* obj)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!*gc_complete_lock_, !*pending_task_lock_);
-  void PushOnAllocationStackWithInternalGC(Thread* self, mirror::Object** obj)
+  void PushOnAllocationStackWithInternalGC(Thread* self, ObjPtr<mirror::Object>* obj)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!*gc_complete_lock_, !*pending_task_lock_);
-  void PushOnThreadLocalAllocationStackWithInternalGC(Thread* thread, mirror::Object** obj)
+  void PushOnThreadLocalAllocationStackWithInternalGC(Thread* thread, ObjPtr<mirror::Object>* obj)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!*gc_complete_lock_, !*pending_task_lock_);
 
@@ -1017,7 +1049,7 @@
   void UpdateGcCountRateHistograms() REQUIRES(gc_complete_lock_);
 
   // GC stress mode attempts to do one GC per unique backtrace.
-  void CheckGcStressMode(Thread* self, mirror::Object** obj)
+  void CheckGcStressMode(Thread* self, ObjPtr<mirror::Object>* obj)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!*gc_complete_lock_, !*pending_task_lock_, !*backtrace_lock_);
 
@@ -1353,6 +1385,11 @@
   // Boot image spaces.
   std::vector<space::ImageSpace*> boot_image_spaces_;
 
+  // An installed allocation listener.
+  Atomic<AllocationListener*> alloc_listener_;
+  // An installed GC Pause listener.
+  Atomic<GcPauseListener*> gc_pause_listener_;
+
   friend class CollectorTransitionTask;
   friend class collector::GarbageCollector;
   friend class collector::MarkCompact;
diff --git a/runtime/gc/reference_processor.cc b/runtime/gc/reference_processor.cc
index 9694597..2cde7d5 100644
--- a/runtime/gc/reference_processor.cc
+++ b/runtime/gc/reference_processor.cc
@@ -55,17 +55,17 @@
 }
 
 void ReferenceProcessor::BroadcastForSlowPath(Thread* self) {
-  CHECK(kUseReadBarrier);
   MutexLock mu(self, *Locks::reference_processor_lock_);
   condition_.Broadcast(self);
 }
 
-mirror::Object* ReferenceProcessor::GetReferent(Thread* self, mirror::Reference* reference) {
+ObjPtr<mirror::Object> ReferenceProcessor::GetReferent(Thread* self,
+                                                       ObjPtr<mirror::Reference> reference) {
   if (!kUseReadBarrier || self->GetWeakRefAccessEnabled()) {
     // Under read barrier / concurrent copying collector, it's not safe to call GetReferent() when
     // weak ref access is disabled as the call includes a read barrier which may push a ref onto the
     // mark stack and interfere with termination of marking.
-    mirror::Object* const referent = reference->GetReferent();
+    ObjPtr<mirror::Object> const referent = reference->GetReferent();
     // If the referent is null then it is already cleared, we can just return null since there is no
     // scenario where it becomes non-null during the reference processing phase.
     if (UNLIKELY(!SlowPathEnabled()) || referent == nullptr) {
@@ -98,6 +98,9 @@
         }
       }
     }
+    // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the
+    // presence of threads blocking for weak ref access.
+    self->CheckEmptyCheckpoint();
     condition_.WaitHoldingLocks(self);
   }
   return reference->GetReferent();
@@ -116,7 +119,8 @@
 }
 
 // Process reference class instances and schedule finalizations.
-void ReferenceProcessor::ProcessReferences(bool concurrent, TimingLogger* timings,
+void ReferenceProcessor::ProcessReferences(bool concurrent,
+                                           TimingLogger* timings,
                                            bool clear_soft_references,
                                            collector::GarbageCollector* collector) {
   TimingLogger::ScopedTiming t(concurrent ? __FUNCTION__ : "(Paused)ProcessReferences", timings);
@@ -188,7 +192,8 @@
 
 // Process the "referent" field in a java.lang.ref.Reference.  If the referent has not yet been
 // marked, put it on the appropriate list in the heap for later processing.
-void ReferenceProcessor::DelayReferenceReferent(mirror::Class* klass, mirror::Reference* ref,
+void ReferenceProcessor::DelayReferenceReferent(ObjPtr<mirror::Class> klass,
+                                                ObjPtr<mirror::Reference> ref,
                                                 collector::GarbageCollector* collector) {
   // klass can be the class of the old object if the visitor already updated the class of ref.
   DCHECK(klass != nullptr);
@@ -208,7 +213,7 @@
     } else if (klass->IsPhantomReferenceClass()) {
       phantom_reference_queue_.AtomicEnqueueIfNotEnqueued(self, ref);
     } else {
-      LOG(FATAL) << "Invalid reference type " << PrettyClass(klass) << " " << std::hex
+      LOG(FATAL) << "Invalid reference type " << klass->PrettyClass() << " " << std::hex
                  << klass->GetAccessFlags();
     }
   }
@@ -260,12 +265,16 @@
   }
 }
 
-bool ReferenceProcessor::MakeCircularListIfUnenqueued(mirror::FinalizerReference* reference) {
+bool ReferenceProcessor::MakeCircularListIfUnenqueued(
+    ObjPtr<mirror::FinalizerReference> reference) {
   Thread* self = Thread::Current();
   MutexLock mu(self, *Locks::reference_processor_lock_);
   // Wait untul we are done processing reference.
   while ((!kUseReadBarrier && SlowPathEnabled()) ||
          (kUseReadBarrier && !self->GetWeakRefAccessEnabled())) {
+    // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the
+    // presence of threads blocking for weak ref access.
+    self->CheckEmptyCheckpoint();
     condition_.WaitHoldingLocks(self);
   }
   // At this point, since the sentinel of the reference is live, it is guaranteed to not be
diff --git a/runtime/gc/reference_processor.h b/runtime/gc/reference_processor.h
index 4788f8a..759b7e1 100644
--- a/runtime/gc/reference_processor.h
+++ b/runtime/gc/reference_processor.h
@@ -46,7 +46,9 @@
 class ReferenceProcessor {
  public:
   explicit ReferenceProcessor();
-  void ProcessReferences(bool concurrent, TimingLogger* timings, bool clear_soft_references,
+  void ProcessReferences(bool concurrent,
+                         TimingLogger* timings,
+                         bool clear_soft_references,
                          gc::collector::GarbageCollector* collector)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(Locks::heap_bitmap_lock_)
@@ -57,16 +59,17 @@
   void EnableSlowPath() REQUIRES_SHARED(Locks::mutator_lock_);
   void BroadcastForSlowPath(Thread* self);
   // Decode the referent, may block if references are being processed.
-  mirror::Object* GetReferent(Thread* self, mirror::Reference* reference)
+  ObjPtr<mirror::Object> GetReferent(Thread* self, ObjPtr<mirror::Reference> reference)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Locks::reference_processor_lock_);
   void EnqueueClearedReferences(Thread* self) REQUIRES(!Locks::mutator_lock_);
-  void DelayReferenceReferent(mirror::Class* klass, mirror::Reference* ref,
+  void DelayReferenceReferent(ObjPtr<mirror::Class> klass,
+                              ObjPtr<mirror::Reference> ref,
                               collector::GarbageCollector* collector)
       REQUIRES_SHARED(Locks::mutator_lock_);
   void UpdateRoots(IsMarkedVisitor* visitor)
       REQUIRES_SHARED(Locks::mutator_lock_, Locks::heap_bitmap_lock_);
   // Make a circular list with reference if it is not enqueued. Uses the finalizer queue lock.
-  bool MakeCircularListIfUnenqueued(mirror::FinalizerReference* reference)
+  bool MakeCircularListIfUnenqueued(ObjPtr<mirror::FinalizerReference> reference)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::reference_processor_lock_,
                !Locks::reference_queue_finalizer_references_lock_);
diff --git a/runtime/gc/reference_queue.cc b/runtime/gc/reference_queue.cc
index 62625c4..a0eb197 100644
--- a/runtime/gc/reference_queue.cc
+++ b/runtime/gc/reference_queue.cc
@@ -29,7 +29,7 @@
 ReferenceQueue::ReferenceQueue(Mutex* lock) : lock_(lock), list_(nullptr) {
 }
 
-void ReferenceQueue::AtomicEnqueueIfNotEnqueued(Thread* self, mirror::Reference* ref) {
+void ReferenceQueue::AtomicEnqueueIfNotEnqueued(Thread* self, ObjPtr<mirror::Reference> ref) {
   DCHECK(ref != nullptr);
   MutexLock mu(self, *lock_);
   if (ref->IsUnprocessed()) {
@@ -37,16 +37,16 @@
   }
 }
 
-void ReferenceQueue::EnqueueReference(mirror::Reference* ref) {
+void ReferenceQueue::EnqueueReference(ObjPtr<mirror::Reference> ref) {
   DCHECK(ref != nullptr);
   CHECK(ref->IsUnprocessed());
   if (IsEmpty()) {
     // 1 element cyclic queue, ie: Reference ref = ..; ref.pendingNext = ref;
-    list_ = ref;
+    list_ = ref.Ptr();
   } else {
     // The list is owned by the GC, everything that has been inserted must already be at least
     // gray.
-    mirror::Reference* head = list_->GetPendingNext<kWithoutReadBarrier>();
+    ObjPtr<mirror::Reference> head = list_->GetPendingNext<kWithoutReadBarrier>();
     DCHECK(head != nullptr);
     ref->SetPendingNext(head);
   }
@@ -54,16 +54,16 @@
   list_->SetPendingNext(ref);
 }
 
-mirror::Reference* ReferenceQueue::DequeuePendingReference() {
+ObjPtr<mirror::Reference> ReferenceQueue::DequeuePendingReference() {
   DCHECK(!IsEmpty());
-  mirror::Reference* ref = list_->GetPendingNext<kWithoutReadBarrier>();
+  ObjPtr<mirror::Reference> ref = list_->GetPendingNext<kWithoutReadBarrier>();
   DCHECK(ref != nullptr);
   // Note: the following code is thread-safe because it is only called from ProcessReferences which
   // is single threaded.
   if (list_ == ref) {
     list_ = nullptr;
   } else {
-    mirror::Reference* next = ref->GetPendingNext<kWithoutReadBarrier>();
+    ObjPtr<mirror::Reference> next = ref->GetPendingNext<kWithoutReadBarrier>();
     list_->SetPendingNext(next);
   }
   ref->SetPendingNext(nullptr);
@@ -75,19 +75,19 @@
     // collector (SemiSpace) is running.
     CHECK(ref != nullptr);
     collector::ConcurrentCopying* concurrent_copying = heap->ConcurrentCopyingCollector();
-    mirror::Object* rb_ptr = ref->GetReadBarrierPointer();
-    if (rb_ptr == ReadBarrier::GrayPtr()) {
-      ref->AtomicSetReadBarrierPointer(ReadBarrier::GrayPtr(), ReadBarrier::WhitePtr());
-      CHECK_EQ(ref->GetReadBarrierPointer(), ReadBarrier::WhitePtr());
+    uint32_t rb_state = ref->GetReadBarrierState();
+    if (rb_state == ReadBarrier::GrayState()) {
+      ref->AtomicSetReadBarrierState(ReadBarrier::GrayState(), ReadBarrier::WhiteState());
+      CHECK_EQ(ref->GetReadBarrierState(), ReadBarrier::WhiteState());
     } else {
       // In ConcurrentCopying::ProcessMarkStackRef() we may leave a white reference in the queue and
       // find it here, which is OK.
-      CHECK_EQ(rb_ptr, ReadBarrier::WhitePtr()) << "ref=" << ref << " rb_ptr=" << rb_ptr;
-      mirror::Object* referent = ref->GetReferent<kWithoutReadBarrier>();
+      CHECK_EQ(rb_state, ReadBarrier::WhiteState()) << "ref=" << ref << " rb_state=" << rb_state;
+      ObjPtr<mirror::Object> referent = ref->GetReferent<kWithoutReadBarrier>();
       // The referent could be null if it's cleared by a mutator (Reference.clear()).
       if (referent != nullptr) {
-        CHECK(concurrent_copying->IsInToSpace(referent))
-            << "ref=" << ref << " rb_ptr=" << ref->GetReadBarrierPointer()
+        CHECK(concurrent_copying->IsInToSpace(referent.Ptr()))
+            << "ref=" << ref << " rb_state=" << ref->GetReadBarrierState()
             << " referent=" << referent;
       }
     }
@@ -96,13 +96,13 @@
 }
 
 void ReferenceQueue::Dump(std::ostream& os) const {
-  mirror::Reference* cur = list_;
+  ObjPtr<mirror::Reference> cur = list_;
   os << "Reference starting at list_=" << list_ << "\n";
   if (cur == nullptr) {
     return;
   }
   do {
-    mirror::Reference* pending_next = cur->GetPendingNext();
+    ObjPtr<mirror::Reference> pending_next = cur->GetPendingNext();
     os << "Reference= " << cur << " PendingNext=" << pending_next;
     if (cur->IsFinalizerReferenceInstance()) {
       os << " Zombie=" << cur->AsFinalizerReference()->GetZombie();
@@ -114,7 +114,7 @@
 
 size_t ReferenceQueue::GetLength() const {
   size_t count = 0;
-  mirror::Reference* cur = list_;
+  ObjPtr<mirror::Reference> cur = list_;
   if (cur != nullptr) {
     do {
       ++count;
@@ -127,7 +127,7 @@
 void ReferenceQueue::ClearWhiteReferences(ReferenceQueue* cleared_references,
                                           collector::GarbageCollector* collector) {
   while (!IsEmpty()) {
-    mirror::Reference* ref = DequeuePendingReference();
+    ObjPtr<mirror::Reference> ref = DequeuePendingReference();
     mirror::HeapReference<mirror::Object>* referent_addr = ref->GetReferentReferenceAddr();
     if (referent_addr->AsMirrorPtr() != nullptr &&
         !collector->IsMarkedHeapReference(referent_addr)) {
@@ -145,11 +145,11 @@
 void ReferenceQueue::EnqueueFinalizerReferences(ReferenceQueue* cleared_references,
                                                 collector::GarbageCollector* collector) {
   while (!IsEmpty()) {
-    mirror::FinalizerReference* ref = DequeuePendingReference()->AsFinalizerReference();
+    ObjPtr<mirror::FinalizerReference> ref = DequeuePendingReference()->AsFinalizerReference();
     mirror::HeapReference<mirror::Object>* referent_addr = ref->GetReferentReferenceAddr();
     if (referent_addr->AsMirrorPtr() != nullptr &&
         !collector->IsMarkedHeapReference(referent_addr)) {
-      mirror::Object* forward_address = collector->MarkObject(referent_addr->AsMirrorPtr());
+      ObjPtr<mirror::Object> forward_address = collector->MarkObject(referent_addr->AsMirrorPtr());
       // Move the updated referent to the zombie field.
       if (Runtime::Current()->IsActiveTransaction()) {
         ref->SetZombie<true>(forward_address);
@@ -167,8 +167,8 @@
   if (UNLIKELY(IsEmpty())) {
     return;
   }
-  mirror::Reference* const head = list_;
-  mirror::Reference* ref = head;
+  ObjPtr<mirror::Reference> const head = list_;
+  ObjPtr<mirror::Reference> ref = head;
   do {
     mirror::HeapReference<mirror::Object>* referent_addr = ref->GetReferentReferenceAddr();
     if (referent_addr->AsMirrorPtr() != nullptr) {
diff --git a/runtime/gc/reference_queue.h b/runtime/gc/reference_queue.h
index 1de1aa1..b5ec1e5 100644
--- a/runtime/gc/reference_queue.h
+++ b/runtime/gc/reference_queue.h
@@ -26,6 +26,7 @@
 #include "base/timing_logger.h"
 #include "globals.h"
 #include "jni.h"
+#include "obj_ptr.h"
 #include "object_callbacks.h"
 #include "offsets.h"
 #include "thread_pool.h"
@@ -54,15 +55,15 @@
   // Enqueue a reference if it is unprocessed. Thread safe to call from multiple
   // threads since it uses a lock to avoid a race between checking for the references presence and
   // adding it.
-  void AtomicEnqueueIfNotEnqueued(Thread* self, mirror::Reference* ref)
+  void AtomicEnqueueIfNotEnqueued(Thread* self, ObjPtr<mirror::Reference> ref)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!*lock_);
 
   // Enqueue a reference. The reference must be unprocessed.
   // Not thread safe, used when mutators are paused to minimize lock overhead.
-  void EnqueueReference(mirror::Reference* ref) REQUIRES_SHARED(Locks::mutator_lock_);
+  void EnqueueReference(ObjPtr<mirror::Reference> ref) REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Dequeue a reference from the queue and return that dequeued reference.
-  mirror::Reference* DequeuePendingReference() REQUIRES_SHARED(Locks::mutator_lock_);
+  ObjPtr<mirror::Reference> DequeuePendingReference() REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Enqueues finalizer references with white referents.  White referents are blackened, moved to
   // the zombie field, and the referent field is cleared.
@@ -104,7 +105,7 @@
   // calling AtomicEnqueueIfNotEnqueued.
   Mutex* const lock_;
   // The actual reference list. Only a root for the mark compact GC since it will be null for other
-  // GC types.
+  // GC types. Not an ObjPtr since it is accessed from multiple threads.
   mirror::Reference* list_;
 
   DISALLOW_IMPLICIT_CONSTRUCTORS(ReferenceQueue);
diff --git a/runtime/gc/reference_queue_test.cc b/runtime/gc/reference_queue_test.cc
index 5b8a3c2..3ca3353 100644
--- a/runtime/gc/reference_queue_test.cc
+++ b/runtime/gc/reference_queue_test.cc
@@ -52,10 +52,10 @@
 
   std::set<mirror::Reference*> refs = {ref1.Get(), ref2.Get()};
   std::set<mirror::Reference*> dequeued;
-  dequeued.insert(queue.DequeuePendingReference());
+  dequeued.insert(queue.DequeuePendingReference().Ptr());
   ASSERT_TRUE(!queue.IsEmpty());
   ASSERT_EQ(queue.GetLength(), 1U);
-  dequeued.insert(queue.DequeuePendingReference());
+  dequeued.insert(queue.DequeuePendingReference().Ptr());
   ASSERT_EQ(queue.GetLength(), 0U);
   ASSERT_TRUE(queue.IsEmpty());
   ASSERT_EQ(refs, dequeued);
diff --git a/runtime/gc/space/image_space.cc b/runtime/gc/space/image_space.cc
index a40e408..6019540 100644
--- a/runtime/gc/space/image_space.cc
+++ b/runtime/gc/space/image_space.cc
@@ -398,9 +398,9 @@
     CHECK_ALIGNED(current, kObjectAlignment);
     auto* obj = reinterpret_cast<mirror::Object*>(current);
     CHECK(obj->GetClass() != nullptr) << "Image object at address " << obj << " has null class";
-    CHECK(live_bitmap_->Test(obj)) << PrettyTypeOf(obj);
-    if (kUseBakerOrBrooksReadBarrier) {
-      obj->AssertReadBarrierPointer();
+    CHECK(live_bitmap_->Test(obj)) << obj->PrettyTypeOf();
+    if (kUseBakerReadBarrier) {
+      obj->AssertReadBarrierState();
     }
     current += RoundUp(obj->SizeOf(), kObjectAlignment);
   }
@@ -912,7 +912,7 @@
     ALWAYS_INLINE void VisitRoot(mirror::CompressedReference<mirror::Object>* root ATTRIBUTE_UNUSED)
         const {}
 
-    ALWAYS_INLINE void operator()(mirror::Object* obj,
+    ALWAYS_INLINE void operator()(ObjPtr<mirror::Object> obj,
                                   MemberOffset offset,
                                   bool is_static ATTRIBUTE_UNUSED) const
         NO_THREAD_SAFETY_ANALYSIS {
@@ -949,7 +949,8 @@
     }
 
     // java.lang.ref.Reference visitor.
-    void operator()(mirror::Class* klass ATTRIBUTE_UNUSED, mirror::Reference* ref) const
+    void operator()(ObjPtr<mirror::Class> klass ATTRIBUTE_UNUSED,
+                    ObjPtr<mirror::Reference> ref) const
         REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
       mirror::Object* obj = ref->GetReferent<kWithoutReadBarrier>();
       ref->SetFieldObjectWithoutWriteBarrier<false, true, kVerifyNone>(
@@ -1001,7 +1002,7 @@
         mirror::IfTable* iftable = as_klass->GetIfTable<kVerifyNone, kWithoutReadBarrier>();
         // Ensure iftable arrays are fixed up since we need GetMethodArray to return the valid
         // contents.
-        if (iftable != nullptr && IsInAppImage(iftable)) {
+        if (IsInAppImage(iftable)) {
           operator()(iftable);
           for (int32_t i = 0, count = iftable->Count(); i < count; ++i) {
             if (iftable->GetMethodArrayCount<kVerifyNone, kWithoutReadBarrier>(i) > 0) {
@@ -1254,6 +1255,16 @@
             }
           }
         }
+
+        mirror::MethodTypeDexCacheType* method_types = dex_cache->GetResolvedMethodTypes();
+        if (method_types != nullptr) {
+          mirror::MethodTypeDexCacheType* new_method_types =
+              fixup_adapter.ForwardObject(method_types);
+          if (method_types != new_method_types) {
+            dex_cache->SetResolvedMethodTypes(new_method_types);
+          }
+          dex_cache->FixupResolvedMethodTypes<kWithoutReadBarrier>(new_method_types, fixup_adapter);
+        }
       }
     }
     {
@@ -1595,7 +1606,7 @@
 
   std::ostringstream oss;
   bool first = true;
-  for (auto msg : error_msgs) {
+  for (const auto& msg : error_msgs) {
     if (!first) {
       oss << "\n    ";
     }
diff --git a/runtime/gc/space/large_object_space.cc b/runtime/gc/space/large_object_space.cc
index 0030326..e71a397 100644
--- a/runtime/gc/space/large_object_space.cc
+++ b/runtime/gc/space/large_object_space.cc
@@ -155,11 +155,12 @@
   large_objects_.Put(obj, LargeObject {mem_map, false /* not zygote */});
   const size_t allocation_size = mem_map->BaseSize();
   DCHECK(bytes_allocated != nullptr);
-  begin_ = std::min(begin_, reinterpret_cast<uint8_t*>(obj));
-  uint8_t* obj_end = reinterpret_cast<uint8_t*>(obj) + allocation_size;
-  if (end_ == nullptr || obj_end > end_) {
-    end_ = obj_end;
+
+  if (begin_ == nullptr || begin_ > reinterpret_cast<uint8_t*>(obj)) {
+    begin_ = reinterpret_cast<uint8_t*>(obj);
   }
+  end_ = std::max(end_, reinterpret_cast<uint8_t*>(obj) + allocation_size);
+
   *bytes_allocated = allocation_size;
   if (usable_size != nullptr) {
     *usable_size = allocation_size;
@@ -605,9 +606,12 @@
     std::swap(live_bitmap, mark_bitmap);
   }
   AllocSpace::SweepCallbackContext scc(swap_bitmaps, this);
+  std::pair<uint8_t*, uint8_t*> range = GetBeginEndAtomic();
   accounting::LargeObjectBitmap::SweepWalk(*live_bitmap, *mark_bitmap,
-                                           reinterpret_cast<uintptr_t>(Begin()),
-                                           reinterpret_cast<uintptr_t>(End()), SweepCallback, &scc);
+                                           reinterpret_cast<uintptr_t>(range.first),
+                                           reinterpret_cast<uintptr_t>(range.second),
+                                           SweepCallback,
+                                           &scc);
   return scc.freed;
 }
 
@@ -616,6 +620,16 @@
   UNIMPLEMENTED(FATAL);
 }
 
+std::pair<uint8_t*, uint8_t*> LargeObjectMapSpace::GetBeginEndAtomic() const {
+  MutexLock mu(Thread::Current(), lock_);
+  return std::make_pair(Begin(), End());
+}
+
+std::pair<uint8_t*, uint8_t*> FreeListSpace::GetBeginEndAtomic() const {
+  MutexLock mu(Thread::Current(), lock_);
+  return std::make_pair(Begin(), End());
+}
+
 }  // namespace space
 }  // namespace gc
 }  // namespace art
diff --git a/runtime/gc/space/large_object_space.h b/runtime/gc/space/large_object_space.h
index 0320e79..38e28b1 100644
--- a/runtime/gc/space/large_object_space.h
+++ b/runtime/gc/space/large_object_space.h
@@ -104,6 +104,10 @@
   // objects.
   virtual void SetAllLargeObjectsAsZygoteObjects(Thread* self) = 0;
 
+  // GetRangeAtomic returns Begin() and End() atomically, that is, it never returns Begin() and
+  // End() from different allocations.
+  virtual std::pair<uint8_t*, uint8_t*> GetBeginEndAtomic() const = 0;
+
  protected:
   explicit LargeObjectSpace(const std::string& name, uint8_t* begin, uint8_t* end);
   static void SweepCallback(size_t num_ptrs, mirror::Object** ptrs, void* arg);
@@ -139,6 +143,8 @@
   // TODO: disabling thread safety analysis as this may be called when we already hold lock_.
   bool Contains(const mirror::Object* obj) const NO_THREAD_SAFETY_ANALYSIS;
 
+  std::pair<uint8_t*, uint8_t*> GetBeginEndAtomic() const OVERRIDE REQUIRES(!lock_);
+
  protected:
   struct LargeObject {
     MemMap* mem_map;
@@ -172,6 +178,8 @@
   void Walk(DlMallocSpace::WalkCallback callback, void* arg) OVERRIDE REQUIRES(!lock_);
   void Dump(std::ostream& os) const REQUIRES(!lock_);
 
+  std::pair<uint8_t*, uint8_t*> GetBeginEndAtomic() const OVERRIDE REQUIRES(!lock_);
+
  protected:
   FreeListSpace(const std::string& name, MemMap* mem_map, uint8_t* begin, uint8_t* end);
   size_t GetSlotIndexForAddress(uintptr_t address) const {
diff --git a/runtime/gc/space/region_space-inl.h b/runtime/gc/space/region_space-inl.h
index 66fd62c..3e79223 100644
--- a/runtime/gc/space/region_space-inl.h
+++ b/runtime/gc/space/region_space-inl.h
@@ -116,18 +116,17 @@
                                                   size_t* bytes_tl_bulk_allocated) {
   DCHECK(IsAllocated() && IsInToSpace());
   DCHECK_ALIGNED(num_bytes, kAlignment);
-  Atomic<uint8_t*>* atomic_top = reinterpret_cast<Atomic<uint8_t*>*>(&top_);
   uint8_t* old_top;
   uint8_t* new_top;
   do {
-    old_top = atomic_top->LoadRelaxed();
+    old_top = top_.LoadRelaxed();
     new_top = old_top + num_bytes;
     if (UNLIKELY(new_top > end_)) {
       return nullptr;
     }
-  } while (!atomic_top->CompareExchangeWeakSequentiallyConsistent(old_top, new_top));
-  reinterpret_cast<Atomic<uint64_t>*>(&objects_allocated_)->FetchAndAddSequentiallyConsistent(1);
-  DCHECK_LE(atomic_top->LoadRelaxed(), end_);
+  } while (!top_.CompareExchangeWeakRelaxed(old_top, new_top));
+  objects_allocated_.FetchAndAddRelaxed(1);
+  DCHECK_LE(Top(), end_);
   DCHECK_LT(old_top, end_);
   DCHECK_LE(new_top, end_);
   *bytes_allocated = num_bytes;
@@ -241,15 +240,28 @@
     } else if (r->IsLargeTail()) {
       // Do nothing.
     } else {
+      // For newly allocated and evacuated regions, live bytes will be -1.
       uint8_t* pos = r->Begin();
       uint8_t* top = r->Top();
-      while (pos < top) {
-        mirror::Object* obj = reinterpret_cast<mirror::Object*>(pos);
-        if (obj->GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>() != nullptr) {
+      const bool need_bitmap =
+          r->LiveBytes() != static_cast<size_t>(-1) &&
+          r->LiveBytes() != static_cast<size_t>(top - pos);
+      if (need_bitmap) {
+        GetLiveBitmap()->VisitMarkedRange(
+            reinterpret_cast<uintptr_t>(pos),
+            reinterpret_cast<uintptr_t>(top),
+            [callback, arg](mirror::Object* obj) {
           callback(obj, arg);
-          pos = reinterpret_cast<uint8_t*>(GetNextObject(obj));
-        } else {
-          break;
+        });
+      } else {
+        while (pos < top) {
+          mirror::Object* obj = reinterpret_cast<mirror::Object*>(pos);
+          if (obj->GetClass<kDefaultVerifyFlags, kWithoutReadBarrier>() != nullptr) {
+            callback(obj, arg);
+            pos = reinterpret_cast<uint8_t*>(GetNextObject(obj));
+          } else {
+            break;
+          }
         }
       }
     }
diff --git a/runtime/gc/space/region_space.cc b/runtime/gc/space/region_space.cc
index 23cae7c..8077319 100644
--- a/runtime/gc/space/region_space.cc
+++ b/runtime/gc/space/region_space.cc
@@ -57,8 +57,10 @@
   regions_.reset(new Region[num_regions_]);
   uint8_t* region_addr = mem_map->Begin();
   for (size_t i = 0; i < num_regions_; ++i, region_addr += kRegionSize) {
-    regions_[i] = Region(i, region_addr, region_addr + kRegionSize);
+    regions_[i].Init(i, region_addr, region_addr + kRegionSize);
   }
+  mark_bitmap_.reset(
+      accounting::ContinuousSpaceBitmap::Create("region space live bitmap", Begin(), Capacity()));
   if (kIsDebugBuild) {
     CHECK_EQ(regions_[0].Begin(), Begin());
     for (size_t i = 0; i < num_regions_; ++i) {
@@ -70,7 +72,6 @@
     }
     CHECK_EQ(regions_[num_regions_ - 1].End(), Limit());
   }
-  full_region_ = Region();
   DCHECK(!full_region_.IsFree());
   DCHECK(full_region_.IsAllocated());
   current_region_ = &full_region_;
@@ -215,7 +216,28 @@
       r->Clear();
       --num_non_free_regions_;
     } else if (r->IsInUnevacFromSpace()) {
+      size_t full_count = 0;
+      while (r->IsInUnevacFromSpace()) {
+        Region* const cur = &regions_[i + full_count];
+        if (i + full_count >= num_regions_ ||
+            cur->LiveBytes() != static_cast<size_t>(cur->Top() - cur->Begin())) {
+          break;
+        }
+        if (full_count != 0) {
+          cur->SetUnevacFromSpaceAsToSpace();
+        }
+        ++full_count;
+      }
+      // Note that r is the full_count == 0 iteration since it is not handled by the loop.
       r->SetUnevacFromSpaceAsToSpace();
+      if (full_count >= 1) {
+        GetLiveBitmap()->ClearRange(
+            reinterpret_cast<mirror::Object*>(r->Begin()),
+            reinterpret_cast<mirror::Object*>(r->Begin() + full_count * kRegionSize));
+        // Skip over extra regions we cleared.
+        // Subtract one for the for loop.
+        i += full_count - 1;
+      }
     }
   }
   evac_region_ = nullptr;
@@ -323,7 +345,7 @@
 void RegionSpace::RecordAlloc(mirror::Object* ref) {
   CHECK(ref != nullptr);
   Region* r = RefToRegion(ref);
-  reinterpret_cast<Atomic<uint64_t>*>(&r->objects_allocated_)->FetchAndAddSequentiallyConsistent(1);
+  r->objects_allocated_.FetchAndAddSequentiallyConsistent(1);
 }
 
 bool RegionSpace::AllocNewTlab(Thread* self) {
@@ -401,7 +423,8 @@
 }
 
 void RegionSpace::Region::Dump(std::ostream& os) const {
-  os << "Region[" << idx_ << "]=" << reinterpret_cast<void*>(begin_) << "-" << reinterpret_cast<void*>(top_)
+  os << "Region[" << idx_ << "]=" << reinterpret_cast<void*>(begin_) << "-"
+     << reinterpret_cast<void*>(Top())
      << "-" << reinterpret_cast<void*>(end_)
      << " state=" << static_cast<uint>(state_) << " type=" << static_cast<uint>(type_)
      << " objects_allocated=" << objects_allocated_
diff --git a/runtime/gc/space/region_space.h b/runtime/gc/space/region_space.h
index 4e57a85..f3b9595 100644
--- a/runtime/gc/space/region_space.h
+++ b/runtime/gc/space/region_space.h
@@ -77,12 +77,10 @@
     return 0;
   }
   accounting::ContinuousSpaceBitmap* GetLiveBitmap() const OVERRIDE {
-    // No live bitmap.
-    return nullptr;
+    return mark_bitmap_.get();
   }
   accounting::ContinuousSpaceBitmap* GetMarkBitmap() const OVERRIDE {
-    // No mark bitmap.
-    return nullptr;
+    return mark_bitmap_.get();
   }
 
   void Clear() OVERRIDE REQUIRES(!region_lock_);
@@ -248,11 +246,19 @@
           objects_allocated_(0), alloc_time_(0), live_bytes_(static_cast<size_t>(-1)),
           is_newly_allocated_(false), is_a_tlab_(false), thread_(nullptr) {}
 
-    Region(size_t idx, uint8_t* begin, uint8_t* end)
-        : idx_(idx), begin_(begin), top_(begin), end_(end),
-          state_(RegionState::kRegionStateFree), type_(RegionType::kRegionTypeNone),
-          objects_allocated_(0), alloc_time_(0), live_bytes_(static_cast<size_t>(-1)),
-          is_newly_allocated_(false), is_a_tlab_(false), thread_(nullptr) {
+    void Init(size_t idx, uint8_t* begin, uint8_t* end) {
+      idx_ = idx;
+      begin_ = begin;
+      top_.StoreRelaxed(begin);
+      end_ = end;
+      state_ = RegionState::kRegionStateFree;
+      type_ = RegionType::kRegionTypeNone;
+      objects_allocated_.StoreRelaxed(0);
+      alloc_time_ = 0;
+      live_bytes_ = static_cast<size_t>(-1);
+      is_newly_allocated_ = false;
+      is_a_tlab_ = false;
+      thread_ = nullptr;
       DCHECK_LT(begin, end);
       DCHECK_EQ(static_cast<size_t>(end - begin), kRegionSize);
     }
@@ -266,16 +272,13 @@
     }
 
     void Clear() {
-      top_ = begin_;
+      top_.StoreRelaxed(begin_);
       state_ = RegionState::kRegionStateFree;
       type_ = RegionType::kRegionTypeNone;
-      objects_allocated_ = 0;
+      objects_allocated_.StoreRelaxed(0);
       alloc_time_ = 0;
       live_bytes_ = static_cast<size_t>(-1);
-      if (!kMadviseZeroes) {
-        memset(begin_, 0, end_ - begin_);
-      }
-      madvise(begin_, end_ - begin_, MADV_DONTNEED);
+      ZeroAndReleasePages(begin_, end_ - begin_);
       is_newly_allocated_ = false;
       is_a_tlab_ = false;
       thread_ = nullptr;
@@ -289,8 +292,8 @@
       bool is_free = state_ == RegionState::kRegionStateFree;
       if (is_free) {
         DCHECK(IsInNoSpace());
-        DCHECK_EQ(begin_, top_);
-        DCHECK_EQ(objects_allocated_, 0U);
+        DCHECK_EQ(begin_, Top());
+        DCHECK_EQ(objects_allocated_.LoadRelaxed(), 0U);
       }
       return is_free;
     }
@@ -330,7 +333,7 @@
     bool IsLarge() const {
       bool is_large = state_ == RegionState::kRegionStateLarge;
       if (is_large) {
-        DCHECK_LT(begin_ + 1 * MB, top_);
+        DCHECK_LT(begin_ + 1 * MB, Top());
       }
       return is_large;
     }
@@ -339,7 +342,7 @@
     bool IsLargeTail() const {
       bool is_large_tail = state_ == RegionState::kRegionStateLargeTail;
       if (is_large_tail) {
-        DCHECK_EQ(begin_, top_);
+        DCHECK_EQ(begin_, Top());
       }
       return is_large_tail;
     }
@@ -397,15 +400,15 @@
 
     size_t BytesAllocated() const {
       if (IsLarge()) {
-        DCHECK_LT(begin_ + kRegionSize, top_);
-        return static_cast<size_t>(top_ - begin_);
+        DCHECK_LT(begin_ + kRegionSize, Top());
+        return static_cast<size_t>(Top() - begin_);
       } else if (IsLargeTail()) {
-        DCHECK_EQ(begin_, top_);
+        DCHECK_EQ(begin_, Top());
         return 0;
       } else {
         DCHECK(IsAllocated()) << static_cast<uint>(state_);
-        DCHECK_LE(begin_, top_);
-        size_t bytes = static_cast<size_t>(top_ - begin_);
+        DCHECK_LE(begin_, Top());
+        size_t bytes = static_cast<size_t>(Top() - begin_);
         DCHECK_LE(bytes, kRegionSize);
         return bytes;
       }
@@ -413,12 +416,12 @@
 
     size_t ObjectsAllocated() const {
       if (IsLarge()) {
-        DCHECK_LT(begin_ + 1 * MB, top_);
-        DCHECK_EQ(objects_allocated_, 0U);
+        DCHECK_LT(begin_ + 1 * MB, Top());
+        DCHECK_EQ(objects_allocated_.LoadRelaxed(), 0U);
         return 1;
       } else if (IsLargeTail()) {
-        DCHECK_EQ(begin_, top_);
-        DCHECK_EQ(objects_allocated_, 0U);
+        DCHECK_EQ(begin_, Top());
+        DCHECK_EQ(objects_allocated_.LoadRelaxed(), 0U);
         return 0;
       } else {
         DCHECK(IsAllocated()) << static_cast<uint>(state_);
@@ -430,12 +433,12 @@
       return begin_;
     }
 
-    uint8_t* Top() const {
-      return top_;
+    ALWAYS_INLINE uint8_t* Top() const {
+      return top_.LoadRelaxed();
     }
 
     void SetTop(uint8_t* new_top) {
-      top_ = new_top;
+      top_.StoreRelaxed(new_top);
     }
 
     uint8_t* End() const {
@@ -450,27 +453,26 @@
 
     void RecordThreadLocalAllocations(size_t num_objects, size_t num_bytes) {
       DCHECK(IsAllocated());
-      DCHECK_EQ(objects_allocated_, 0U);
-      DCHECK_EQ(top_, end_);
-      objects_allocated_ = num_objects;
-      top_ = begin_ + num_bytes;
-      DCHECK_EQ(top_, end_);
+      DCHECK_EQ(objects_allocated_.LoadRelaxed(), 0U);
+      DCHECK_EQ(Top(), end_);
+      objects_allocated_.StoreRelaxed(num_objects);
+      top_.StoreRelaxed(begin_ + num_bytes);
+      DCHECK_EQ(Top(), end_);
     }
 
    private:
-    size_t idx_;                   // The region's index in the region space.
-    uint8_t* begin_;               // The begin address of the region.
-    // Can't use Atomic<uint8_t*> as Atomic's copy operator is implicitly deleted.
-    uint8_t* top_;                 // The current position of the allocation.
-    uint8_t* end_;                 // The end address of the region.
-    RegionState state_;            // The region state (see RegionState).
-    RegionType type_;              // The region type (see RegionType).
-    uint64_t objects_allocated_;   // The number of objects allocated.
-    uint32_t alloc_time_;          // The allocation time of the region.
-    size_t live_bytes_;            // The live bytes. Used to compute the live percent.
-    bool is_newly_allocated_;      // True if it's allocated after the last collection.
-    bool is_a_tlab_;               // True if it's a tlab.
-    Thread* thread_;               // The owning thread if it's a tlab.
+    size_t idx_;                        // The region's index in the region space.
+    uint8_t* begin_;                    // The begin address of the region.
+    Atomic<uint8_t*> top_;              // The current position of the allocation.
+    uint8_t* end_;                      // The end address of the region.
+    RegionState state_;                 // The region state (see RegionState).
+    RegionType type_;                   // The region type (see RegionType).
+    Atomic<size_t> objects_allocated_;  // The number of objects allocated.
+    uint32_t alloc_time_;               // The allocation time of the region.
+    size_t live_bytes_;                 // The live bytes. Used to compute the live percent.
+    bool is_newly_allocated_;           // True if it's allocated after the last collection.
+    bool is_a_tlab_;                    // True if it's a tlab.
+    Thread* thread_;                    // The owning thread if it's a tlab.
 
     friend class RegionSpace;
   };
@@ -516,6 +518,9 @@
   Region* evac_region_;            // The region that's being evacuated to currently.
   Region full_region_;             // The dummy/sentinel region that looks full.
 
+  // Mark bitmap used by the GC.
+  std::unique_ptr<accounting::ContinuousSpaceBitmap> mark_bitmap_;
+
   DISALLOW_COPY_AND_ASSIGN(RegionSpace);
 };
 
diff --git a/runtime/gc/space/space_test.h b/runtime/gc/space/space_test.h
index 17d7c87..cbb3d73 100644
--- a/runtime/gc/space/space_test.h
+++ b/runtime/gc/space/space_test.h
@@ -62,7 +62,7 @@
       byte_array_class_ = self->GetJniEnv()->NewLocalRef(byte_array_class);
       EXPECT_TRUE(byte_array_class_ != nullptr);
     }
-    return reinterpret_cast<mirror::Class*>(self->DecodeJObject(byte_array_class_));
+    return self->DecodeJObject(byte_array_class_)->AsClass();
   }
 
   mirror::Object* Alloc(space::MallocSpace* alloc_space,
@@ -108,13 +108,10 @@
     EXPECT_GE(size, SizeOfZeroLengthByteArray());
     EXPECT_TRUE(byte_array_class != nullptr);
     o->SetClass(byte_array_class);
-    if (kUseBakerOrBrooksReadBarrier) {
+    if (kUseBakerReadBarrier) {
       // Like the proper heap object allocation, install and verify
-      // the correct read barrier pointer.
-      if (kUseBrooksReadBarrier) {
-        o->SetReadBarrierPointer(o);
-      }
-      o->AssertReadBarrierPointer();
+      // the correct read barrier state.
+      o->AssertReadBarrierState();
     }
     mirror::Array* arr = o->AsArray<kVerifyNone>();
     size_t header_size = SizeOfZeroLengthByteArray();
diff --git a/runtime/gc/system_weak.h b/runtime/gc/system_weak.h
index 3910a28..e5cddfc 100644
--- a/runtime/gc/system_weak.h
+++ b/runtime/gc/system_weak.h
@@ -30,7 +30,8 @@
 
   virtual void Allow() REQUIRES_SHARED(Locks::mutator_lock_) = 0;
   virtual void Disallow() REQUIRES_SHARED(Locks::mutator_lock_) = 0;
-  virtual void Broadcast() REQUIRES_SHARED(Locks::mutator_lock_) = 0;
+  // See Runtime::BroadcastForNewSystemWeaks for the broadcast_for_checkpoint definition.
+  virtual void Broadcast(bool broadcast_for_checkpoint) = 0;
 
   virtual void Sweep(IsMarkedVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_) = 0;
 };
@@ -61,19 +62,27 @@
     allow_new_system_weak_ = false;
   }
 
-  void Broadcast() OVERRIDE
-      REQUIRES_SHARED(Locks::mutator_lock_)
+  void Broadcast(bool broadcast_for_checkpoint ATTRIBUTE_UNUSED) OVERRIDE
       REQUIRES(!allow_disallow_lock_) {
-    CHECK(kUseReadBarrier);
     MutexLock mu(Thread::Current(), allow_disallow_lock_);
     new_weak_condition_.Broadcast(Thread::Current());
   }
 
+  // WARNING: For lock annotations only.
+  Mutex* GetAllowDisallowLock() const RETURN_CAPABILITY(allow_disallow_lock_) {
+    return nullptr;
+  }
+
  protected:
-  void Wait(Thread* self) REQUIRES_SHARED(allow_disallow_lock_) {
+  void Wait(Thread* self)
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_) {
     // Wait for GC's sweeping to complete and allow new records
     while (UNLIKELY((!kUseReadBarrier && !allow_new_system_weak_) ||
                     (kUseReadBarrier && !self->GetWeakRefAccessEnabled()))) {
+      // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the
+      // presence of threads blocking for weak ref access.
+      self->CheckEmptyCheckpoint();
       new_weak_condition_.WaitHoldingLocks(self);
     }
   }
diff --git a/runtime/gc/system_weak_test.cc b/runtime/gc/system_weak_test.cc
index af8a444..9b601c0 100644
--- a/runtime/gc/system_weak_test.cc
+++ b/runtime/gc/system_weak_test.cc
@@ -58,12 +58,14 @@
     disallow_count_++;
   }
 
-  void Broadcast() OVERRIDE
-      REQUIRES_SHARED(Locks::mutator_lock_)
+  void Broadcast(bool broadcast_for_checkpoint) OVERRIDE
       REQUIRES(!allow_disallow_lock_) {
-    SystemWeakHolder::Broadcast();
+    SystemWeakHolder::Broadcast(broadcast_for_checkpoint);
 
-    allow_count_++;
+    if (!broadcast_for_checkpoint) {
+      // Don't count the broadcasts for running checkpoints.
+      allow_count_++;
+    }
   }
 
   void Sweep(IsMarkedVisitor* visitor) OVERRIDE
diff --git a/runtime/generate-operator-out.py b/runtime/generate-operator-out.py
new file mode 120000
index 0000000..cc291d2
--- /dev/null
+++ b/runtime/generate-operator-out.py
@@ -0,0 +1 @@
+../tools/generate-operator-out.py
\ No newline at end of file
diff --git a/runtime/generated/asm_support_gen.h b/runtime/generated/asm_support_gen.h
index 03f5bf6..f13ff8c 100644
--- a/runtime/generated/asm_support_gen.h
+++ b/runtime/generated/asm_support_gen.h
@@ -52,6 +52,8 @@
 DEFINE_CHECK_EQ(static_cast<uint32_t>(MIRROR_CLASS_STATUS_INITIALIZED), (static_cast<uint32_t>((art::mirror::Class::kStatusInitialized))))
 #define ACCESS_FLAGS_CLASS_IS_FINALIZABLE 0x80000000
 DEFINE_CHECK_EQ(static_cast<uint32_t>(ACCESS_FLAGS_CLASS_IS_FINALIZABLE), (static_cast<uint32_t>((art::kAccClassIsFinalizable))))
+#define ACCESS_FLAGS_CLASS_IS_INTERFACE 0x200
+DEFINE_CHECK_EQ(static_cast<uint32_t>(ACCESS_FLAGS_CLASS_IS_INTERFACE), (static_cast<uint32_t>((art::kAccInterface))))
 #define ACCESS_FLAGS_CLASS_IS_FINALIZABLE_BIT 0x1f
 DEFINE_CHECK_EQ(static_cast<uint32_t>(ACCESS_FLAGS_CLASS_IS_FINALIZABLE_BIT), (static_cast<uint32_t>((art::MostSignificantBit(art::kAccClassIsFinalizable)))))
 #define ART_METHOD_DEX_CACHE_METHODS_OFFSET_32 20
@@ -96,6 +98,12 @@
 DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_READ_BARRIER_STATE_MASK_TOGGLED), (static_cast<uint32_t>(art::LockWord::kReadBarrierStateMaskShiftedToggled)))
 #define LOCK_WORD_THIN_LOCK_COUNT_ONE 65536
 DEFINE_CHECK_EQ(static_cast<int32_t>(LOCK_WORD_THIN_LOCK_COUNT_ONE), (static_cast<int32_t>(art::LockWord::kThinLockCountOne)))
+#define LOCK_WORD_STATE_FORWARDING_ADDRESS 0x3
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_STATE_FORWARDING_ADDRESS), (static_cast<uint32_t>(art::LockWord::kStateForwardingAddress)))
+#define LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW 0x40000000
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_STATE_FORWARDING_ADDRESS_OVERFLOW), (static_cast<uint32_t>(art::LockWord::kStateForwardingAddressOverflow)))
+#define LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT 0x3
+DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_STATE_FORWARDING_ADDRESS_SHIFT), (static_cast<uint32_t>(art::LockWord::kForwardingAddressShift)))
 #define LOCK_WORD_GC_STATE_MASK_SHIFTED 0x30000000
 DEFINE_CHECK_EQ(static_cast<uint32_t>(LOCK_WORD_GC_STATE_MASK_SHIFTED), (static_cast<uint32_t>(art::LockWord::kGCStateMaskShifted)))
 #define LOCK_WORD_GC_STATE_MASK_SHIFTED_TOGGLED 0xcfffffff
@@ -134,6 +142,10 @@
 DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_SUSPEND_REQUEST), (static_cast<int32_t>((art::kSuspendRequest))))
 #define THREAD_CHECKPOINT_REQUEST 2
 DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_CHECKPOINT_REQUEST), (static_cast<int32_t>((art::kCheckpointRequest))))
+#define THREAD_EMPTY_CHECKPOINT_REQUEST 4
+DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_EMPTY_CHECKPOINT_REQUEST), (static_cast<int32_t>((art::kEmptyCheckpointRequest))))
+#define THREAD_SUSPEND_OR_CHECKPOINT_REQUEST 7
+DEFINE_CHECK_EQ(static_cast<int32_t>(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), (static_cast<int32_t>((art::kSuspendRequest | art::kCheckpointRequest | art::kEmptyCheckpointRequest))))
 #define JIT_CHECK_OSR (-1)
 DEFINE_CHECK_EQ(static_cast<int16_t>(JIT_CHECK_OSR), (static_cast<int16_t>((art::jit::kJitCheckForOSR))))
 #define JIT_HOTNESS_DISABLE (-2)
diff --git a/runtime/globals.h b/runtime/globals.h
index 28534e4..6164225 100644
--- a/runtime/globals.h
+++ b/runtime/globals.h
@@ -172,6 +172,9 @@
 static constexpr bool kIsVdexEnabled = false;
 #endif
 
+// Size of a heap reference.
+static constexpr size_t kHeapReferenceSize = sizeof(uint32_t);
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_GLOBALS_H_
diff --git a/runtime/handle.h b/runtime/handle.h
index d33d4a6..3db3be2 100644
--- a/runtime/handle.h
+++ b/runtime/handle.h
@@ -42,13 +42,9 @@
   Handle() : reference_(nullptr) {
   }
 
-  ALWAYS_INLINE Handle(const Handle<T>& handle) : reference_(handle.reference_) {
-  }
+  ALWAYS_INLINE Handle(const Handle<T>& handle) = default;
 
-  ALWAYS_INLINE Handle<T>& operator=(const Handle<T>& handle) {
-    reference_ = handle.reference_;
-    return *this;
-  }
+  ALWAYS_INLINE Handle<T>& operator=(const Handle<T>& handle) = default;
 
   ALWAYS_INLINE explicit Handle(StackReference<T>* reference) : reference_(reference) {
   }
@@ -109,15 +105,10 @@
   }
 
   ALWAYS_INLINE MutableHandle(const MutableHandle<T>& handle)
-      REQUIRES_SHARED(Locks::mutator_lock_)
-      : Handle<T>(handle.reference_) {
-  }
+      REQUIRES_SHARED(Locks::mutator_lock_) = default;
 
   ALWAYS_INLINE MutableHandle<T>& operator=(const MutableHandle<T>& handle)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    Handle<T>::operator=(handle);
-    return *this;
-  }
+      REQUIRES_SHARED(Locks::mutator_lock_) = default;
 
   ALWAYS_INLINE explicit MutableHandle(StackReference<T>* reference)
       REQUIRES_SHARED(Locks::mutator_lock_)
diff --git a/runtime/handle_scope-inl.h b/runtime/handle_scope-inl.h
index cceb007..b212d09 100644
--- a/runtime/handle_scope-inl.h
+++ b/runtime/handle_scope-inl.h
@@ -28,24 +28,30 @@
 namespace art {
 
 template<size_t kNumReferences>
-inline StackHandleScope<kNumReferences>::StackHandleScope(Thread* self, mirror::Object* fill_value)
-    : HandleScope(self->GetTopHandleScope(), kNumReferences), self_(self), pos_(0) {
-  DCHECK_EQ(self, Thread::Current());
+inline FixedSizeHandleScope<kNumReferences>::FixedSizeHandleScope(BaseHandleScope* link,
+                                                                  mirror::Object* fill_value)
+    : HandleScope(link, kNumReferences) {
   if (kDebugLocking) {
     Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
   }
-  static_assert(kNumReferences >= 1, "StackHandleScope must contain at least 1 reference");
-  // TODO: Figure out how to use a compile assert.
-  CHECK_EQ(&storage_[0], GetReferences());
+  static_assert(kNumReferences >= 1, "FixedSizeHandleScope must contain at least 1 reference");
+  DCHECK_EQ(&storage_[0], GetReferences());  // TODO: Figure out how to use a compile assert.
   for (size_t i = 0; i < kNumReferences; ++i) {
     SetReference(i, fill_value);
   }
+}
+
+template<size_t kNumReferences>
+inline StackHandleScope<kNumReferences>::StackHandleScope(Thread* self, mirror::Object* fill_value)
+    : FixedSizeHandleScope<kNumReferences>(self->GetTopHandleScope(), fill_value),
+      self_(self) {
+  DCHECK_EQ(self, Thread::Current());
   self_->PushHandleScope(this);
 }
 
 template<size_t kNumReferences>
 inline StackHandleScope<kNumReferences>::~StackHandleScope() {
-  HandleScope* top_handle_scope = self_->PopHandleScope();
+  BaseHandleScope* top_handle_scope = self_->PopHandleScope();
   DCHECK_EQ(top_handle_scope, this);
   if (kDebugLocking) {
     Locks::mutator_lock_->AssertSharedHeld(self_);
@@ -66,7 +72,7 @@
 }
 
 inline mirror::Object* HandleScope::GetReference(size_t i) const {
-  DCHECK_LT(i, number_of_references_);
+  DCHECK_LT(i, NumberOfReferences());
   if (kDebugLocking) {
     Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
   }
@@ -74,12 +80,12 @@
 }
 
 inline Handle<mirror::Object> HandleScope::GetHandle(size_t i) {
-  DCHECK_LT(i, number_of_references_);
+  DCHECK_LT(i, NumberOfReferences());
   return Handle<mirror::Object>(&GetReferences()[i]);
 }
 
 inline MutableHandle<mirror::Object> HandleScope::GetMutableHandle(size_t i) {
-  DCHECK_LT(i, number_of_references_);
+  DCHECK_LT(i, NumberOfReferences());
   return MutableHandle<mirror::Object>(&GetReferences()[i]);
 }
 
@@ -87,7 +93,7 @@
   if (kDebugLocking) {
     Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
   }
-  DCHECK_LT(i, number_of_references_);
+  DCHECK_LT(i, NumberOfReferences());
   GetReferences()[i].Assign(object);
 }
 
@@ -95,13 +101,13 @@
   // A HandleScope should always contain something. One created by the
   // jni_compiler should have a jobject/jclass as a native method is
   // passed in a this pointer or a class
-  DCHECK_GT(number_of_references_, 0U);
+  DCHECK_GT(NumberOfReferences(), 0U);
   return &GetReferences()[0] <= handle_scope_entry &&
       handle_scope_entry <= &GetReferences()[number_of_references_ - 1];
 }
 
 template<size_t kNumReferences> template<class T>
-inline MutableHandle<T> StackHandleScope<kNumReferences>::NewHandle(T* object) {
+inline MutableHandle<T> FixedSizeHandleScope<kNumReferences>::NewHandle(T* object) {
   SetReference(pos_, object);
   MutableHandle<T> h(GetHandle<T>(pos_));
   pos_++;
@@ -109,24 +115,24 @@
 }
 
 template<size_t kNumReferences> template<class MirrorType, bool kPoison>
-inline MutableHandle<MirrorType> StackHandleScope<kNumReferences>::NewHandle(
+inline MutableHandle<MirrorType> FixedSizeHandleScope<kNumReferences>::NewHandle(
     ObjPtr<MirrorType, kPoison> object) {
   return NewHandle(object.Ptr());
 }
 
 template<size_t kNumReferences> template<class T>
-inline HandleWrapper<T> StackHandleScope<kNumReferences>::NewHandleWrapper(T** object) {
+inline HandleWrapper<T> FixedSizeHandleScope<kNumReferences>::NewHandleWrapper(T** object) {
   return HandleWrapper<T>(object, NewHandle(*object));
 }
 
 template<size_t kNumReferences> template<class T>
-inline HandleWrapperObjPtr<T> StackHandleScope<kNumReferences>::NewHandleWrapper(
+inline HandleWrapperObjPtr<T> FixedSizeHandleScope<kNumReferences>::NewHandleWrapper(
     ObjPtr<T>* object) {
   return HandleWrapperObjPtr<T>(object, NewHandle(*object));
 }
 
 template<size_t kNumReferences>
-inline void StackHandleScope<kNumReferences>::SetReference(size_t i, mirror::Object* object) {
+inline void FixedSizeHandleScope<kNumReferences>::SetReference(size_t i, mirror::Object* object) {
   if (kDebugLocking) {
     Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
   }
@@ -135,12 +141,111 @@
   GetReferences()[i].Assign(object);
 }
 
+// Number of references contained within this handle scope.
+inline uint32_t BaseHandleScope::NumberOfReferences() const {
+  return LIKELY(!IsVariableSized())
+      ? AsHandleScope()->NumberOfReferences()
+      : AsVariableSized()->NumberOfReferences();
+}
+
+inline bool BaseHandleScope::Contains(StackReference<mirror::Object>* handle_scope_entry) const {
+  return LIKELY(!IsVariableSized())
+      ? AsHandleScope()->Contains(handle_scope_entry)
+      : AsVariableSized()->Contains(handle_scope_entry);
+}
+
+template <typename Visitor>
+inline void BaseHandleScope::VisitRoots(Visitor& visitor) {
+  if (LIKELY(!IsVariableSized())) {
+    AsHandleScope()->VisitRoots(visitor);
+  } else {
+    AsVariableSized()->VisitRoots(visitor);
+  }
+}
+
+inline VariableSizedHandleScope* BaseHandleScope::AsVariableSized() {
+  DCHECK(IsVariableSized());
+  return down_cast<VariableSizedHandleScope*>(this);
+}
+
+inline HandleScope* BaseHandleScope::AsHandleScope() {
+  DCHECK(!IsVariableSized());
+  return down_cast<HandleScope*>(this);
+}
+
+inline const VariableSizedHandleScope* BaseHandleScope::AsVariableSized() const {
+  DCHECK(IsVariableSized());
+  return down_cast<const VariableSizedHandleScope*>(this);
+}
+
+inline const HandleScope* BaseHandleScope::AsHandleScope() const {
+  DCHECK(!IsVariableSized());
+  return down_cast<const HandleScope*>(this);
+}
+
+template<class T>
+MutableHandle<T> VariableSizedHandleScope::NewHandle(T* object) {
+  if (current_scope_->RemainingSlots() == 0) {
+    current_scope_ = new LocalScopeType(current_scope_);
+  }
+  return current_scope_->NewHandle(object);
+}
+
 template<class MirrorType, bool kPoison>
-inline MutableHandle<MirrorType> StackHandleScopeCollection::NewHandle(
+inline MutableHandle<MirrorType> VariableSizedHandleScope::NewHandle(
     ObjPtr<MirrorType, kPoison> ptr) {
   return NewHandle(ptr.Ptr());
 }
 
+inline VariableSizedHandleScope::VariableSizedHandleScope(Thread* const self)
+    : BaseHandleScope(self->GetTopHandleScope()),
+      self_(self) {
+  current_scope_ = new LocalScopeType(/*link*/ nullptr);
+  self_->PushHandleScope(this);
+}
+
+inline VariableSizedHandleScope::~VariableSizedHandleScope() {
+  BaseHandleScope* top_handle_scope = self_->PopHandleScope();
+  DCHECK_EQ(top_handle_scope, this);
+  while (current_scope_ != nullptr) {
+    LocalScopeType* next = reinterpret_cast<LocalScopeType*>(current_scope_->GetLink());
+    delete current_scope_;
+    current_scope_ = next;
+  }
+}
+
+inline uint32_t VariableSizedHandleScope::NumberOfReferences() const {
+  uint32_t sum = 0;
+  const LocalScopeType* cur = current_scope_;
+  while (cur != nullptr) {
+    sum += cur->NumberOfReferences();
+    cur = reinterpret_cast<const LocalScopeType*>(cur->GetLink());
+  }
+  return sum;
+}
+
+inline bool VariableSizedHandleScope::Contains(StackReference<mirror::Object>* handle_scope_entry)
+    const {
+  const LocalScopeType* cur = current_scope_;
+  while (cur != nullptr) {
+    if (cur->Contains(handle_scope_entry)) {
+      return true;
+    }
+    cur = reinterpret_cast<const LocalScopeType*>(cur->GetLink());
+  }
+  return false;
+}
+
+template <typename Visitor>
+inline void VariableSizedHandleScope::VisitRoots(Visitor& visitor) {
+  LocalScopeType* cur = current_scope_;
+  while (cur != nullptr) {
+    cur->VisitRoots(visitor);
+    cur = reinterpret_cast<LocalScopeType*>(cur->GetLink());
+  }
+}
+
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_HANDLE_SCOPE_INL_H_
diff --git a/runtime/handle_scope.h b/runtime/handle_scope.h
index fc729a5..adb7d8a 100644
--- a/runtime/handle_scope.h
+++ b/runtime/handle_scope.h
@@ -29,26 +29,69 @@
 
 namespace art {
 
+class HandleScope;
 template<class MirrorType, bool kPoison> class ObjPtr;
+class Thread;
+class VariableSizedHandleScope;
 
 namespace mirror {
 class Object;
 }
 
-class Thread;
+// Basic handle scope, tracked by a list. May be variable sized.
+class PACKED(4) BaseHandleScope {
+ public:
+  bool IsVariableSized() const {
+    return number_of_references_ == kNumReferencesVariableSized;
+  }
+
+  // Number of references contained within this handle scope.
+  ALWAYS_INLINE uint32_t NumberOfReferences() const;
+
+  ALWAYS_INLINE bool Contains(StackReference<mirror::Object>* handle_scope_entry) const;
+
+  template <typename Visitor>
+  ALWAYS_INLINE void VisitRoots(Visitor& visitor) REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Link to previous BaseHandleScope or null.
+  BaseHandleScope* GetLink() const {
+    return link_;
+  }
+
+  ALWAYS_INLINE VariableSizedHandleScope* AsVariableSized();
+  ALWAYS_INLINE HandleScope* AsHandleScope();
+  ALWAYS_INLINE const VariableSizedHandleScope* AsVariableSized() const;
+  ALWAYS_INLINE const HandleScope* AsHandleScope() const;
+
+ protected:
+  BaseHandleScope(BaseHandleScope* link, uint32_t num_references)
+      : link_(link),
+        number_of_references_(num_references) {}
+
+  // Variable sized constructor.
+  explicit BaseHandleScope(BaseHandleScope* link)
+      : link_(link),
+        number_of_references_(kNumReferencesVariableSized) {}
+
+  static constexpr int32_t kNumReferencesVariableSized = -1;
+
+  // Link-list of handle scopes. The root is held by a Thread.
+  BaseHandleScope* const link_;
+
+  // Number of handlerized references. -1 for variable sized handle scopes.
+  const int32_t number_of_references_;
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(BaseHandleScope);
+};
 
 // HandleScopes are scoped objects containing a number of Handles. They are used to allocate
 // handles, for these handles (and the objects contained within them) to be visible/roots for the
 // GC. It is most common to stack allocate HandleScopes using StackHandleScope.
-class PACKED(4) HandleScope {
+class PACKED(4) HandleScope : public BaseHandleScope {
  public:
   ~HandleScope() {}
 
-  // Number of references contained within this handle scope.
-  uint32_t NumberOfReferences() const {
-    return number_of_references_;
-  }
-
   // We have versions with and without explicit pointer size of the following. The first two are
   // used at runtime, so OFFSETOF_MEMBER computes the right offsets automatically. The last one
   // takes the pointer size explicitly so that at compile time we can cross-compile correctly.
@@ -59,11 +102,6 @@
   // Returns the size of a HandleScope containing num_references handles.
   static size_t SizeOf(PointerSize pointer_size, uint32_t num_references);
 
-  // Link to previous HandleScope or null.
-  HandleScope* GetLink() const {
-    return link_;
-  }
-
   ALWAYS_INLINE mirror::Object* GetReference(size_t i) const
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -93,11 +131,26 @@
   }
 
   // Placement new creation.
-  static HandleScope* Create(void* storage, HandleScope* link, uint32_t num_references)
+  static HandleScope* Create(void* storage, BaseHandleScope* link, uint32_t num_references)
       WARN_UNUSED {
     return new (storage) HandleScope(link, num_references);
   }
 
+  // Number of references contained within this handle scope.
+  ALWAYS_INLINE uint32_t NumberOfReferences() const {
+    DCHECK_GE(number_of_references_, 0);
+    return static_cast<uint32_t>(number_of_references_);
+  }
+
+  template <typename Visitor>
+  void VisitRoots(Visitor& visitor) REQUIRES_SHARED(Locks::mutator_lock_) {
+    for (size_t i = 0, count = NumberOfReferences(); i < count; ++i) {
+      // GetReference returns a pointer to the stack reference within the handle scope. If this
+      // needs to be updated, it will be done by the root visitor.
+      visitor.VisitRootIfNonNull(GetHandle(i).GetReference());
+    }
+  }
+
  protected:
   // Return backing storage used for references.
   ALWAYS_INLINE StackReference<mirror::Object>* GetReferences() const {
@@ -105,20 +158,11 @@
     return reinterpret_cast<StackReference<mirror::Object>*>(address);
   }
 
-  explicit HandleScope(size_t number_of_references) :
-      link_(nullptr), number_of_references_(number_of_references) {
-  }
+  explicit HandleScope(size_t number_of_references) : HandleScope(nullptr, number_of_references) {}
 
   // Semi-hidden constructor. Construction expected by generated code and StackHandleScope.
-  HandleScope(HandleScope* link, uint32_t num_references) :
-      link_(link), number_of_references_(num_references) {
-  }
-
-  // Link-list of handle scopes. The root is held by a Thread.
-  HandleScope* const link_;
-
-  // Number of handlerized references.
-  const uint32_t number_of_references_;
+  HandleScope(BaseHandleScope* link, uint32_t num_references)
+      : BaseHandleScope(link, num_references) {}
 
   // Storage for references.
   // StackReference<mirror::Object> references_[number_of_references_]
@@ -165,14 +209,10 @@
   ObjPtr<T>* const obj_;
 };
 
-
-// Scoped handle storage of a fixed size that is usually stack allocated.
+// Fixed size handle scope that is not necessarily linked in the thread.
 template<size_t kNumReferences>
-class PACKED(4) StackHandleScope FINAL : public HandleScope {
+class PACKED(4) FixedSizeHandleScope : public HandleScope {
  public:
-  explicit ALWAYS_INLINE StackHandleScope(Thread* self, mirror::Object* fill_value = nullptr);
-  ALWAYS_INLINE ~StackHandleScope();
-
   template<class T>
   ALWAYS_INLINE MutableHandle<T> NewHandle(T* object) REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -191,11 +231,15 @@
   ALWAYS_INLINE void SetReference(size_t i, mirror::Object* object)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  Thread* Self() const {
-    return self_;
+  size_t RemainingSlots() const {
+    return kNumReferences - pos_;
   }
 
  private:
+  explicit ALWAYS_INLINE FixedSizeHandleScope(BaseHandleScope* link,
+                                              mirror::Object* fill_value = nullptr);
+  ALWAYS_INLINE ~FixedSizeHandleScope() {}
+
   template<class T>
   ALWAYS_INLINE MutableHandle<T> GetHandle(size_t i) REQUIRES_SHARED(Locks::mutator_lock_) {
     DCHECK_LT(i, kNumReferences);
@@ -205,66 +249,65 @@
   // Reference storage needs to be first as expected by the HandleScope layout.
   StackReference<mirror::Object> storage_[kNumReferences];
 
+  // Position new handles will be created.
+  size_t pos_ = 0;
+
+  template<size_t kNumRefs> friend class StackHandleScope;
+  friend class VariableSizedHandleScope;
+};
+
+// Scoped handle storage of a fixed size that is stack allocated.
+template<size_t kNumReferences>
+class PACKED(4) StackHandleScope FINAL : public FixedSizeHandleScope<kNumReferences> {
+ public:
+  explicit ALWAYS_INLINE StackHandleScope(Thread* self, mirror::Object* fill_value = nullptr);
+  ALWAYS_INLINE ~StackHandleScope();
+
+  Thread* Self() const {
+    return self_;
+  }
+
+ private:
   // The thread that the stack handle scope is a linked list upon. The stack handle scope will
   // push and pop itself from this thread.
   Thread* const self_;
-
-  // Position new handles will be created.
-  size_t pos_;
-
-  template<size_t kNumRefs> friend class StackHandleScope;
 };
 
-// Utility class to manage a collection (stack) of StackHandleScope. All the managed
-// scope handle have the same fixed sized.
-// Calls to NewHandle will create a new handle inside the top StackHandleScope.
-// When the handle scope becomes full a new one is created and push on top of the
-// previous.
-//
-// NB:
-// - it is not safe to use the *same* StackHandleScopeCollection intermix with
-// other StackHandleScopes.
-// - this is a an easy way around implementing a full ZoneHandleScope to manage an
-// arbitrary number of handles.
-class StackHandleScopeCollection {
+// Utility class to manage a variable sized handle scope by having a list of fixed size handle
+// scopes.
+// Calls to NewHandle will create a new handle inside the current FixedSizeHandleScope.
+// When the current handle scope becomes full a new one is created and put at the front of the
+// list.
+class VariableSizedHandleScope : public BaseHandleScope {
  public:
-  explicit StackHandleScopeCollection(Thread* const self) :
-      self_(self),
-      current_scope_num_refs_(0) {
-  }
-
-  ~StackHandleScopeCollection() {
-    while (!scopes_.empty()) {
-      delete scopes_.top();
-      scopes_.pop();
-    }
-  }
+  explicit VariableSizedHandleScope(Thread* const self);
+  ~VariableSizedHandleScope();
 
   template<class T>
-  MutableHandle<T> NewHandle(T* object) REQUIRES_SHARED(Locks::mutator_lock_) {
-    if (scopes_.empty() || current_scope_num_refs_ >= kNumReferencesPerScope) {
-      StackHandleScope<kNumReferencesPerScope>* scope =
-          new StackHandleScope<kNumReferencesPerScope>(self_);
-      scopes_.push(scope);
-      current_scope_num_refs_ = 0;
-    }
-    current_scope_num_refs_++;
-    return scopes_.top()->NewHandle(object);
-  }
+  MutableHandle<T> NewHandle(T* object) REQUIRES_SHARED(Locks::mutator_lock_);
 
   template<class MirrorType, bool kPoison>
   MutableHandle<MirrorType> NewHandle(ObjPtr<MirrorType, kPoison> ptr)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // Number of references contained within this handle scope.
+  ALWAYS_INLINE uint32_t NumberOfReferences() const;
+
+  ALWAYS_INLINE bool Contains(StackReference<mirror::Object>* handle_scope_entry) const;
+
+  template <typename Visitor>
+  void VisitRoots(Visitor& visitor) REQUIRES_SHARED(Locks::mutator_lock_);
+
  private:
   static constexpr size_t kNumReferencesPerScope = 4;
 
   Thread* const self_;
 
-  std::stack<StackHandleScope<kNumReferencesPerScope>*> scopes_;
-  size_t current_scope_num_refs_;
+  // Linked list of fixed size handle scopes.
+  using LocalScopeType = FixedSizeHandleScope<kNumReferencesPerScope>;
+  LocalScopeType* current_scope_;
 
-  DISALLOW_COPY_AND_ASSIGN(StackHandleScopeCollection);
+  DISALLOW_COPY_AND_ASSIGN(VariableSizedHandleScope);
 };
 
 }  // namespace art
diff --git a/runtime/handle_scope_test.cc b/runtime/handle_scope_test.cc
index c269a37..aab1d9c 100644
--- a/runtime/handle_scope_test.cc
+++ b/runtime/handle_scope_test.cc
@@ -14,59 +14,106 @@
  * limitations under the License.
  */
 
+#include <type_traits>
+
 #include "base/enums.h"
+#include "common_runtime_test.h"
 #include "gtest/gtest.h"
+#include "handle.h"
 #include "handle_scope-inl.h"
+#include "mirror/object.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread.h"
 
 namespace art {
 
-// Handle scope with a fixed size which is allocated on the stack.
-template<size_t kNumReferences>
-class NoThreadStackHandleScope : public HandleScope {
- public:
-  explicit NoThreadStackHandleScope(HandleScope* link) : HandleScope(link, kNumReferences) {
-  }
-  ~NoThreadStackHandleScope() {
-  }
+// Handles are value objects and should be trivially copyable.
+static_assert(std::is_trivially_copyable<Handle<mirror::Object>>::value,
+              "Handle should be trivially copyable");
+static_assert(std::is_trivially_copyable<MutableHandle<mirror::Object>>::value,
+              "MutableHandle should be trivially copyable");
+static_assert(std::is_trivially_copyable<ScopedNullHandle<mirror::Object>>::value,
+              "ScopedNullHandle should be trivially copyable");
 
- private:
-  // references_storage_ needs to be first so that it matches the address of references_
-  StackReference<mirror::Object> references_storage_[kNumReferences];
-};
+class HandleScopeTest : public CommonRuntimeTest {};
 
 // Test the offsets computed for members of HandleScope. Because of cross-compiling
 // it is impossible the use OFFSETOF_MEMBER, so we do some reasonable computations ourselves. This
 // test checks whether we do the right thing.
-TEST(HandleScopeTest, Offsets) NO_THREAD_SAFETY_ANALYSIS {
+TEST_F(HandleScopeTest, Offsets) {
+  ScopedObjectAccess soa(Thread::Current());
+  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
   // As the members of HandleScope are private, we cannot use OFFSETOF_MEMBER
   // here. So do the inverse: set some data, and access it through pointers created from the offsets.
-  NoThreadStackHandleScope<0x9ABC> test_table(reinterpret_cast<HandleScope*>(0x5678));
-  test_table.SetReference(0, reinterpret_cast<mirror::Object*>(0x1234));
+  StackHandleScope<0x1> hs0(soa.Self());
+  static const size_t kNumReferences = 0x9ABC;
+  StackHandleScope<kNumReferences> test_table(soa.Self());
+  ObjPtr<mirror::Class> c = class_linker->FindSystemClass(soa.Self(), "Ljava/lang/Object;");
+  test_table.SetReference(0, c.Ptr());
 
   uint8_t* table_base_ptr = reinterpret_cast<uint8_t*>(&test_table);
 
   {
-    uintptr_t* link_ptr = reinterpret_cast<uintptr_t*>(table_base_ptr +
+    BaseHandleScope** link_ptr = reinterpret_cast<BaseHandleScope**>(table_base_ptr +
         HandleScope::LinkOffset(kRuntimePointerSize));
-    EXPECT_EQ(*link_ptr, static_cast<size_t>(0x5678));
+    EXPECT_EQ(*link_ptr, &hs0);
   }
 
   {
     uint32_t* num_ptr = reinterpret_cast<uint32_t*>(table_base_ptr +
         HandleScope::NumberOfReferencesOffset(kRuntimePointerSize));
-    EXPECT_EQ(*num_ptr, static_cast<size_t>(0x9ABC));
+    EXPECT_EQ(*num_ptr, static_cast<size_t>(kNumReferences));
   }
 
   {
-    // Assume sizeof(StackReference<mirror::Object>) == sizeof(uint32_t)
-    // TODO: How can we make this assumption-less but still access directly and fully?
-    EXPECT_EQ(sizeof(StackReference<mirror::Object>), sizeof(uint32_t));
-
-    uint32_t* ref_ptr = reinterpret_cast<uint32_t*>(table_base_ptr +
+    auto* ref_ptr = reinterpret_cast<StackReference<mirror::Object>*>(table_base_ptr +
         HandleScope::ReferencesOffset(kRuntimePointerSize));
-    EXPECT_EQ(*ref_ptr, static_cast<uint32_t>(0x1234));
+    EXPECT_OBJ_PTR_EQ(ref_ptr->AsMirrorPtr(), c);
+  }
+}
+
+class CollectVisitor {
+ public:
+  void VisitRootIfNonNull(StackReference<mirror::Object>* ref) {
+    if (!ref->IsNull()) {
+      visited.insert(ref);
+    }
+    ++total_visited;
+  }
+
+  std::set<StackReference<mirror::Object>*> visited;
+  size_t total_visited = 0;  // including null.
+};
+
+// Test functionality of variable sized handle scopes.
+TEST_F(HandleScopeTest, VariableSized) {
+  ScopedObjectAccess soa(Thread::Current());
+  VariableSizedHandleScope hs(soa.Self());
+  ClassLinker* const class_linker = Runtime::Current()->GetClassLinker();
+  Handle<mirror::Class> c =
+      hs.NewHandle(class_linker->FindSystemClass(soa.Self(), "Ljava/lang/Object;"));
+  // Test nested scopes.
+  StackHandleScope<1> inner(soa.Self());
+  inner.NewHandle(c->AllocObject(soa.Self()));
+  // Add a bunch of handles and make sure callbacks work.
+  static const size_t kNumHandles = 100;
+  std::vector<Handle<mirror::Object>> handles;
+  for (size_t i = 0; i < kNumHandles; ++i) {
+    BaseHandleScope* base = &hs;
+    ObjPtr<mirror::Object> o = c->AllocObject(soa.Self());
+    handles.push_back(hs.NewHandle(o));
+    EXPECT_OBJ_PTR_EQ(o, handles.back().Get());
+    EXPECT_TRUE(hs.Contains(handles.back().GetReference()));
+    EXPECT_TRUE(base->Contains(handles.back().GetReference()));
+    EXPECT_EQ(hs.NumberOfReferences(), base->NumberOfReferences());
+  }
+  CollectVisitor visitor;
+  BaseHandleScope* base = &hs;
+  base->VisitRoots(visitor);
+  EXPECT_LE(visitor.visited.size(), base->NumberOfReferences());
+  EXPECT_EQ(visitor.total_visited, base->NumberOfReferences());
+  for (StackReference<mirror::Object>* ref : visitor.visited) {
+    EXPECT_TRUE(base->Contains(ref));
   }
 }
 
diff --git a/runtime/hprof/hprof.cc b/runtime/hprof/hprof.cc
index ecb2157..8cbe491 100644
--- a/runtime/hprof/hprof.cc
+++ b/runtime/hprof/hprof.cc
@@ -574,9 +574,9 @@
   }
 
   void WriteStringTable() {
-    for (const std::pair<std::string, HprofStringId>& p : strings_) {
+    for (const auto& p : strings_) {
       const std::string& string = p.first;
-      const size_t id = p.second;
+      const HprofStringId id = p.second;
 
       output_->StartNewRecord(HPROF_TAG_STRING, kHprofTime);
 
@@ -654,7 +654,7 @@
   }
 
   HprofStringId LookupClassNameId(mirror::Class* c) REQUIRES_SHARED(Locks::mutator_lock_) {
-    return LookupStringId(PrettyDescriptor(c));
+    return LookupStringId(c->PrettyDescriptor());
   }
 
   void WriteFixedHeader() {
@@ -1167,8 +1167,8 @@
 }
 
 void Hprof::DumpHeapClass(mirror::Class* klass) {
-  if (!klass->IsLoaded() && !klass->IsErroneous()) {
-    // Class is allocated but not yet loaded: we cannot access its fields or super class.
+  if (!klass->IsResolved() && !klass->IsErroneous()) {
+    // Class is allocated but not yet resolved: we cannot access its fields or super class.
     return;
   }
   const size_t num_static_fields = klass->NumStaticFields();
diff --git a/runtime/image.cc b/runtime/image.cc
index 299d5fd..bd5ba93 100644
--- a/runtime/image.cc
+++ b/runtime/image.cc
@@ -25,7 +25,7 @@
 namespace art {
 
 const uint8_t ImageHeader::kImageMagic[] = { 'a', 'r', 't', '\n' };
-const uint8_t ImageHeader::kImageVersion[] = { '0', '3', '1', '\0' };
+const uint8_t ImageHeader::kImageVersion[] = { '0', '3', '2', '\0' };
 
 ImageHeader::ImageHeader(uint32_t image_begin,
                          uint32_t image_size,
diff --git a/runtime/imtable-inl.h b/runtime/imtable-inl.h
index 0cb9b5e..cb85fa6 100644
--- a/runtime/imtable-inl.h
+++ b/runtime/imtable-inl.h
@@ -20,15 +20,82 @@
 #include "imtable.h"
 
 #include "art_method-inl.h"
+#include "dex_file.h"
+#include "utf.h"
 
 namespace art {
 
-inline uint32_t ImTable::GetBaseImtHash(ArtMethod* method) REQUIRES_SHARED(Locks::mutator_lock_) {
-  return method->GetDexMethodIndex();
+static constexpr bool kImTableHashUseName = true;
+static constexpr bool kImTableHashUseCoefficients = true;
+
+// Magic configuration that minimizes some common runtime calls.
+static constexpr uint32_t kImTableHashCoefficientClass = 427;
+static constexpr uint32_t kImTableHashCoefficientName = 16;
+static constexpr uint32_t kImTableHashCoefficientSignature = 14;
+
+inline void ImTable::GetImtHashComponents(ArtMethod* method,
+                                          uint32_t* class_hash,
+                                          uint32_t* name_hash,
+                                          uint32_t* signature_hash) {
+  if (kImTableHashUseName) {
+    if (method->IsProxyMethod()) {
+      *class_hash = 0;
+      *name_hash = 0;
+      *signature_hash = 0;
+      return;
+    }
+
+    const DexFile* dex_file = method->GetDexFile();
+    const DexFile::MethodId& method_id = dex_file->GetMethodId(method->GetDexMethodIndex());
+
+    // Class descriptor for the class component.
+    *class_hash = ComputeModifiedUtf8Hash(dex_file->GetMethodDeclaringClassDescriptor(method_id));
+
+    // Method name for the method component.
+    *name_hash = ComputeModifiedUtf8Hash(dex_file->GetMethodName(method_id));
+
+    const DexFile::ProtoId& proto_id = dex_file->GetMethodPrototype(method_id);
+
+    // Read the proto for the signature component.
+    uint32_t tmp = ComputeModifiedUtf8Hash(
+        dex_file->GetTypeDescriptor(dex_file->GetTypeId(proto_id.return_type_idx_)));
+
+    // Mix in the argument types.
+    // Note: we could consider just using the shorty. This would be faster, at the price of
+    //       potential collisions.
+    const DexFile::TypeList* param_types = dex_file->GetProtoParameters(proto_id);
+    if (param_types != nullptr) {
+      for (size_t i = 0; i != param_types->Size(); ++i) {
+        const DexFile::TypeItem& type = param_types->GetTypeItem(i);
+        tmp = 31 * tmp + ComputeModifiedUtf8Hash(
+            dex_file->GetTypeDescriptor(dex_file->GetTypeId(type.type_idx_)));
+      }
+    }
+
+    *signature_hash = tmp;
+    return;
+  } else {
+    *class_hash = method->GetDexMethodIndex();
+    *name_hash = 0;
+    *signature_hash = 0;
+    return;
+  }
 }
 
 inline uint32_t ImTable::GetImtIndex(ArtMethod* method) {
-  return GetBaseImtHash(method) % ImTable::kSize;
+  uint32_t class_hash, name_hash, signature_hash;
+  GetImtHashComponents(method, &class_hash, &name_hash, &signature_hash);
+
+  uint32_t mixed_hash;
+  if (!kImTableHashUseCoefficients) {
+    mixed_hash = class_hash + name_hash + signature_hash;
+  } else {
+    mixed_hash = kImTableHashCoefficientClass * class_hash +
+                 kImTableHashCoefficientName * name_hash +
+                 kImTableHashCoefficientSignature * signature_hash;
+  }
+
+  return mixed_hash % ImTable::kSize;
 }
 
 }  // namespace art
diff --git a/runtime/imtable.h b/runtime/imtable.h
index 6df890d..b7066bd 100644
--- a/runtime/imtable.h
+++ b/runtime/imtable.h
@@ -23,6 +23,7 @@
 
 #include "base/enums.h"
 #include "base/macros.h"
+#include "base/mutex.h"
 
 namespace art {
 
@@ -74,18 +75,17 @@
     return kSize * static_cast<size_t>(pointer_size);
   }
 
-  // Converts a method to the base hash used in GetImtIndex.
-  ALWAYS_INLINE static inline uint32_t GetBaseImtHash(ArtMethod* method)
-      REQUIRES_SHARED(Locks::mutator_lock_);
-  ALWAYS_INLINE static inline uint32_t GetBaseImtHash(const DexFile* dex_file, uint32_t method_idx)
+  // Converts a method to the base hash components used in GetImtIndex.
+  ALWAYS_INLINE static inline void GetImtHashComponents(ArtMethod* method,
+                                                        uint32_t* class_hash,
+                                                        uint32_t* name_hash,
+                                                        uint32_t* signature_hash)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // The (complete) hashing scheme to map an ArtMethod to a slot in the Interface Method Table
   // (IMT).
   ALWAYS_INLINE static inline uint32_t GetImtIndex(ArtMethod* method)
       REQUIRES_SHARED(Locks::mutator_lock_);
-  ALWAYS_INLINE static inline uint32_t GetImtIndex(const DexFile* dex_file, uint32_t method_idx)
-      REQUIRES_SHARED(Locks::mutator_lock_);
 };
 
 }  // namespace art
diff --git a/runtime/imtable_test.cc b/runtime/imtable_test.cc
new file mode 100644
index 0000000..8cbe291
--- /dev/null
+++ b/runtime/imtable_test.cc
@@ -0,0 +1,104 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "imtable-inl.h"
+
+#include <memory>
+#include <string>
+
+#include "jni.h"
+
+#include "base/mutex.h"
+#include "class_linker.h"
+#include "common_runtime_test.h"
+#include "mirror/accessible_object.h"
+#include "mirror/class.h"
+#include "mirror/class_loader.h"
+#include "handle_scope-inl.h"
+#include "scoped_thread_state_change-inl.h"
+#include "thread-inl.h"
+
+namespace art {
+
+class ImTableTest : public CommonRuntimeTest {
+ public:
+  std::pair<mirror::Class*, mirror::Class*> LoadClasses(const std::string& class_name)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    jobject jclass_loader_a = LoadDex("IMTA");
+    CHECK(jclass_loader_a != nullptr);
+    jobject jclass_loader_b = LoadDex("IMTB");
+    CHECK(jclass_loader_b != nullptr);
+
+    ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+    Thread* self = Thread::Current();
+
+    StackHandleScope<3> hs(self);
+    MutableHandle<mirror::ClassLoader> h_class_loader = hs.NewHandle<mirror::ClassLoader>(nullptr);
+
+    // A.
+    h_class_loader.Assign(
+        ObjPtr<mirror::ClassLoader>::DownCast(self->DecodeJObject(jclass_loader_a)));
+    Handle<mirror::Class> h_class_a(
+          hs.NewHandle(class_linker->FindClass(self, class_name.c_str(), h_class_loader)));
+    if (h_class_a.Get() == nullptr) {
+      LOG(ERROR) << self->GetException()->Dump();
+      CHECK(false) << "h_class_a == nullptr";
+    }
+
+    // B.
+    h_class_loader.Assign(
+        ObjPtr<mirror::ClassLoader>::DownCast(self->DecodeJObject(jclass_loader_b)));
+    Handle<mirror::Class> h_class_b(
+          hs.NewHandle(class_linker->FindClass(self, class_name.c_str(), h_class_loader)));
+    if (h_class_b.Get() == nullptr) {
+      LOG(ERROR) << self->GetException()->Dump();
+      CHECK(false) << "h_class_b == nullptr";
+    }
+
+    return std::make_pair(h_class_a.Get(), h_class_b.Get());
+  }
+
+  std::pair<ArtMethod*, ArtMethod*> LoadMethods(const std::string& class_name,
+                                                const std::string& method_name)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    std::pair<mirror::Class*, mirror::Class*> classes = LoadClasses(class_name);
+
+    const PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
+
+    ArtMethod* method_a =
+        classes.first->FindDeclaredVirtualMethodByName(method_name, pointer_size);
+    ArtMethod* method_b =
+        classes.second->FindDeclaredVirtualMethodByName(method_name, pointer_size);
+
+    return std::make_pair(method_a, method_b);
+  }
+};
+
+TEST_F(ImTableTest, NewMethodBefore) {
+  ScopedObjectAccess soa(Thread::Current());
+
+  std::pair<ArtMethod*, ArtMethod*> methods = LoadMethods("LInterfaces$A;", "foo");
+  CHECK_EQ(ImTable::GetImtIndex(methods.first), ImTable::GetImtIndex(methods.second));
+}
+
+TEST_F(ImTableTest, NewClassBefore) {
+  ScopedObjectAccess soa(Thread::Current());
+
+  std::pair<ArtMethod*, ArtMethod*> methods = LoadMethods("LInterfaces$Z;", "foo");
+  CHECK_EQ(ImTable::GetImtIndex(methods.first), ImTable::GetImtIndex(methods.second));
+}
+
+}  // namespace art
diff --git a/runtime/indirect_reference_table-inl.h b/runtime/indirect_reference_table-inl.h
index e05f8f3..9c634fa 100644
--- a/runtime/indirect_reference_table-inl.h
+++ b/runtime/indirect_reference_table-inl.h
@@ -43,15 +43,15 @@
                                    iref));
     return false;
   }
-  const int topIndex = segment_state_.parts.topIndex;
-  int idx = ExtractIndex(iref);
-  if (UNLIKELY(idx >= topIndex)) {
+  const uint32_t top_index = segment_state_.top_index;
+  uint32_t idx = ExtractIndex(iref);
+  if (UNLIKELY(idx >= top_index)) {
     std::string msg = StringPrintf(
         "JNI ERROR (app bug): accessed stale %s %p  (index %d in a table of size %d)",
         GetIndirectRefKindString(kind_),
         iref,
         idx,
-        topIndex);
+        top_index);
     AbortIfNoCheckJNI(msg);
     return false;
   }
@@ -68,7 +68,9 @@
 }
 
 // Make sure that the entry at "idx" is correctly paired with "iref".
-inline bool IndirectReferenceTable::CheckEntry(const char* what, IndirectRef iref, int idx) const {
+inline bool IndirectReferenceTable::CheckEntry(const char* what,
+                                               IndirectRef iref,
+                                               uint32_t idx) const {
   IndirectRef checkRef = ToIndirectRef(idx);
   if (UNLIKELY(checkRef != iref)) {
     std::string msg = StringPrintf(
@@ -90,7 +92,7 @@
   }
   uint32_t idx = ExtractIndex(iref);
   ObjPtr<mirror::Object> obj = table_[idx].GetReference()->Read<kReadBarrierOption>();
-  VerifyObject(obj.Ptr());
+  VerifyObject(obj);
   return obj;
 }
 
diff --git a/runtime/indirect_reference_table.cc b/runtime/indirect_reference_table.cc
index d59bb39..c737119 100644
--- a/runtime/indirect_reference_table.cc
+++ b/runtime/indirect_reference_table.cc
@@ -32,6 +32,7 @@
 namespace art {
 
 static constexpr bool kDumpStackOnNonLocalReference = false;
+static constexpr bool kDebugIRT = false;
 
 const char* GetIndirectRefKindString(const IndirectRefKind& kind) {
   switch (kind) {
@@ -58,85 +59,234 @@
   }
 }
 
-IndirectReferenceTable::IndirectReferenceTable(size_t initialCount,
-                                               size_t maxCount, IndirectRefKind desiredKind,
-                                               bool abort_on_error)
-    : kind_(desiredKind),
-      max_entries_(maxCount) {
-  CHECK_GT(initialCount, 0U);
-  CHECK_LE(initialCount, maxCount);
-  CHECK_NE(desiredKind, kHandleScopeOrInvalid);
+IndirectReferenceTable::IndirectReferenceTable(size_t max_count,
+                                               IndirectRefKind desired_kind,
+                                               ResizableCapacity resizable,
+                                               std::string* error_msg)
+    : segment_state_(kIRTFirstSegment),
+      kind_(desired_kind),
+      max_entries_(max_count),
+      current_num_holes_(0),
+      resizable_(resizable) {
+  CHECK(error_msg != nullptr);
+  CHECK_NE(desired_kind, kHandleScopeOrInvalid);
 
-  std::string error_str;
-  const size_t table_bytes = maxCount * sizeof(IrtEntry);
+  const size_t table_bytes = max_count * sizeof(IrtEntry);
   table_mem_map_.reset(MemMap::MapAnonymous("indirect ref table", nullptr, table_bytes,
-                                            PROT_READ | PROT_WRITE, false, false, &error_str));
-  if (abort_on_error) {
-    CHECK(table_mem_map_.get() != nullptr) << error_str;
-    CHECK_EQ(table_mem_map_->Size(), table_bytes);
-    CHECK(table_mem_map_->Begin() != nullptr);
-  } else if (table_mem_map_.get() == nullptr ||
-             table_mem_map_->Size() != table_bytes ||
-             table_mem_map_->Begin() == nullptr) {
-    table_mem_map_.reset();
-    LOG(ERROR) << error_str;
-    return;
+                                            PROT_READ | PROT_WRITE, false, false, error_msg));
+  if (table_mem_map_.get() == nullptr && error_msg->empty()) {
+    *error_msg = "Unable to map memory for indirect ref table";
   }
-  table_ = reinterpret_cast<IrtEntry*>(table_mem_map_->Begin());
-  segment_state_.all = IRT_FIRST_SEGMENT;
+
+  if (table_mem_map_.get() != nullptr) {
+    table_ = reinterpret_cast<IrtEntry*>(table_mem_map_->Begin());
+  } else {
+    table_ = nullptr;
+  }
+  segment_state_ = kIRTFirstSegment;
+  last_known_previous_state_ = kIRTFirstSegment;
 }
 
 IndirectReferenceTable::~IndirectReferenceTable() {
 }
 
+void IndirectReferenceTable::ConstexprChecks() {
+  // Use this for some assertions. They can't be put into the header as C++ wants the class
+  // to be complete.
+
+  // Check kind.
+  static_assert((EncodeIndirectRefKind(kLocal) & (~kKindMask)) == 0, "Kind encoding error");
+  static_assert((EncodeIndirectRefKind(kGlobal) & (~kKindMask)) == 0, "Kind encoding error");
+  static_assert((EncodeIndirectRefKind(kWeakGlobal) & (~kKindMask)) == 0, "Kind encoding error");
+  static_assert(DecodeIndirectRefKind(EncodeIndirectRefKind(kLocal)) == kLocal,
+                "Kind encoding error");
+  static_assert(DecodeIndirectRefKind(EncodeIndirectRefKind(kGlobal)) == kGlobal,
+                "Kind encoding error");
+  static_assert(DecodeIndirectRefKind(EncodeIndirectRefKind(kWeakGlobal)) == kWeakGlobal,
+                "Kind encoding error");
+
+  // Check serial.
+  static_assert(DecodeSerial(EncodeSerial(0u)) == 0u, "Serial encoding error");
+  static_assert(DecodeSerial(EncodeSerial(1u)) == 1u, "Serial encoding error");
+  static_assert(DecodeSerial(EncodeSerial(2u)) == 2u, "Serial encoding error");
+  static_assert(DecodeSerial(EncodeSerial(3u)) == 3u, "Serial encoding error");
+
+  // Table index.
+  static_assert(DecodeIndex(EncodeIndex(0u)) == 0u, "Index encoding error");
+  static_assert(DecodeIndex(EncodeIndex(1u)) == 1u, "Index encoding error");
+  static_assert(DecodeIndex(EncodeIndex(2u)) == 2u, "Index encoding error");
+  static_assert(DecodeIndex(EncodeIndex(3u)) == 3u, "Index encoding error");
+}
+
 bool IndirectReferenceTable::IsValid() const {
   return table_mem_map_.get() != nullptr;
 }
 
-IndirectRef IndirectReferenceTable::Add(uint32_t cookie, ObjPtr<mirror::Object> obj) {
-  IRTSegmentState prevState;
-  prevState.all = cookie;
-  size_t topIndex = segment_state_.parts.topIndex;
+// Holes:
+//
+// To keep the IRT compact, we want to fill "holes" created by non-stack-discipline Add & Remove
+// operation sequences. For simplicity and lower memory overhead, we do not use a free list or
+// similar. Instead, we scan for holes, with the expectation that we will find holes fast as they
+// are usually near the end of the table (see the header, TODO: verify this assumption). To avoid
+// scans when there are no holes, the number of known holes should be tracked.
+//
+// A previous implementation stored the top index and the number of holes as the segment state.
+// This constraints the maximum number of references to 16-bit. We want to relax this, as it
+// is easy to require more references (e.g., to list all classes in large applications). Thus,
+// the implicitly stack-stored state, the IRTSegmentState, is only the top index.
+//
+// Thus, hole count is a local property of the current segment, and needs to be recovered when
+// (or after) a frame is pushed or popped. To keep JNI transitions simple (and inlineable), we
+// cannot do work when the segment changes. Thus, Add and Remove need to ensure the current
+// hole count is correct.
+//
+// To be able to detect segment changes, we require an additional local field that can describe
+// the known segment. This is last_known_previous_state_. The requirement will become clear with
+// the following (some non-trivial) cases that have to be supported:
+//
+// 1) Segment with holes (current_num_holes_ > 0), push new segment, add/remove reference
+// 2) Segment with holes (current_num_holes_ > 0), pop segment, add/remove reference
+// 3) Segment with holes (current_num_holes_ > 0), push new segment, pop segment, add/remove
+//    reference
+// 4) Empty segment, push new segment, create a hole, pop a segment, add/remove a reference
+// 5) Base segment, push new segment, create a hole, pop a segment, push new segment, add/remove
+//    reference
+//
+// Storing the last known *previous* state (bottom index) allows conservatively detecting all the
+// segment changes above. The condition is simply that the last known state is greater than or
+// equal to the current previous state, and smaller than the current state (top index). The
+// condition is conservative as it adds O(1) overhead to operations on an empty segment.
+
+static size_t CountNullEntries(const IrtEntry* table, size_t from, size_t to) {
+  size_t count = 0;
+  for (size_t index = from; index != to; ++index) {
+    if (table[index].GetReference()->IsNull()) {
+      count++;
+    }
+  }
+  return count;
+}
+
+void IndirectReferenceTable::RecoverHoles(IRTSegmentState prev_state) {
+  if (last_known_previous_state_.top_index >= segment_state_.top_index ||
+      last_known_previous_state_.top_index < prev_state.top_index) {
+    const size_t top_index = segment_state_.top_index;
+    size_t count = CountNullEntries(table_, prev_state.top_index, top_index);
+
+    if (kDebugIRT) {
+      LOG(INFO) << "+++ Recovered holes: "
+                << " Current prev=" << prev_state.top_index
+                << " Current top_index=" << top_index
+                << " Old num_holes=" << current_num_holes_
+                << " New num_holes=" << count;
+    }
+
+    current_num_holes_ = count;
+    last_known_previous_state_ = prev_state;
+  } else if (kDebugIRT) {
+    LOG(INFO) << "No need to recover holes";
+  }
+}
+
+ALWAYS_INLINE
+static inline void CheckHoleCount(IrtEntry* table,
+                                  size_t exp_num_holes,
+                                  IRTSegmentState prev_state,
+                                  IRTSegmentState cur_state) {
+  if (kIsDebugBuild) {
+    size_t count = CountNullEntries(table, prev_state.top_index, cur_state.top_index);
+    CHECK_EQ(exp_num_holes, count) << "prevState=" << prev_state.top_index
+                                   << " topIndex=" << cur_state.top_index;
+  }
+}
+
+bool IndirectReferenceTable::Resize(size_t new_size, std::string* error_msg) {
+  CHECK_GT(new_size, max_entries_);
+
+  const size_t table_bytes = new_size * sizeof(IrtEntry);
+  std::unique_ptr<MemMap> new_map(MemMap::MapAnonymous("indirect ref table",
+                                                       nullptr,
+                                                       table_bytes,
+                                                       PROT_READ | PROT_WRITE,
+                                                       false,
+                                                       false,
+                                                       error_msg));
+  if (new_map == nullptr) {
+    return false;
+  }
+
+  memcpy(new_map->Begin(), table_mem_map_->Begin(), table_mem_map_->Size());
+  table_mem_map_ = std::move(new_map);
+  table_ = reinterpret_cast<IrtEntry*>(table_mem_map_->Begin());
+  max_entries_ = new_size;
+
+  return true;
+}
+
+IndirectRef IndirectReferenceTable::Add(IRTSegmentState previous_state,
+                                        ObjPtr<mirror::Object> obj) {
+  if (kDebugIRT) {
+    LOG(INFO) << "+++ Add: previous_state=" << previous_state.top_index
+              << " top_index=" << segment_state_.top_index
+              << " last_known_prev_top_index=" << last_known_previous_state_.top_index
+              << " holes=" << current_num_holes_;
+  }
+
+  size_t top_index = segment_state_.top_index;
 
   CHECK(obj != nullptr);
-  VerifyObject(obj.Ptr());
+  VerifyObject(obj);
   DCHECK(table_ != nullptr);
-  DCHECK_GE(segment_state_.parts.numHoles, prevState.parts.numHoles);
 
-  if (topIndex == max_entries_) {
-    LOG(FATAL) << "JNI ERROR (app bug): " << kind_ << " table overflow "
-               << "(max=" << max_entries_ << ")\n"
-               << MutatorLockedDumpable<IndirectReferenceTable>(*this);
+  if (top_index == max_entries_) {
+    if (resizable_ == ResizableCapacity::kNo) {
+      LOG(FATAL) << "JNI ERROR (app bug): " << kind_ << " table overflow "
+                 << "(max=" << max_entries_ << ")\n"
+                 << MutatorLockedDumpable<IndirectReferenceTable>(*this);
+      UNREACHABLE();
+    }
+
+    // Try to double space.
+    std::string error_msg;
+    if (!Resize(max_entries_ * 2, &error_msg)) {
+      LOG(FATAL) << "JNI ERROR (app bug): " << kind_ << " table overflow "
+                 << "(max=" << max_entries_ << ")" << std::endl
+                 << MutatorLockedDumpable<IndirectReferenceTable>(*this)
+                 << " Resizing failed: " << error_msg;
+      UNREACHABLE();
+    }
   }
 
+  RecoverHoles(previous_state);
+  CheckHoleCount(table_, current_num_holes_, previous_state, segment_state_);
+
   // We know there's enough room in the table.  Now we just need to find
   // the right spot.  If there's a hole, find it and fill it; otherwise,
   // add to the end of the list.
   IndirectRef result;
-  int numHoles = segment_state_.parts.numHoles - prevState.parts.numHoles;
   size_t index;
-  if (numHoles > 0) {
-    DCHECK_GT(topIndex, 1U);
+  if (current_num_holes_ > 0) {
+    DCHECK_GT(top_index, 1U);
     // Find the first hole; likely to be near the end of the list.
-    IrtEntry* pScan = &table_[topIndex - 1];
-    DCHECK(!pScan->GetReference()->IsNull());
-    --pScan;
-    while (!pScan->GetReference()->IsNull()) {
-      DCHECK_GE(pScan, table_ + prevState.parts.topIndex);
-      --pScan;
+    IrtEntry* p_scan = &table_[top_index - 1];
+    DCHECK(!p_scan->GetReference()->IsNull());
+    --p_scan;
+    while (!p_scan->GetReference()->IsNull()) {
+      DCHECK_GE(p_scan, table_ + previous_state.top_index);
+      --p_scan;
     }
-    index = pScan - table_;
-    segment_state_.parts.numHoles--;
+    index = p_scan - table_;
+    current_num_holes_--;
   } else {
     // Add to the end.
-    index = topIndex++;
-    segment_state_.parts.topIndex = topIndex;
+    index = top_index++;
+    segment_state_.top_index = top_index;
   }
   table_[index].Add(obj);
   result = ToIndirectRef(index);
-  if ((false)) {
-    LOG(INFO) << "+++ added at " << ExtractIndex(result) << " top=" << segment_state_.parts.topIndex
-              << " holes=" << segment_state_.parts.numHoles;
+  if (kDebugIRT) {
+    LOG(INFO) << "+++ added at " << ExtractIndex(result) << " top=" << segment_state_.top_index
+              << " holes=" << current_num_holes_;
   }
 
   DCHECK(result != nullptr);
@@ -161,14 +311,18 @@
 // This method is not called when a local frame is popped; this is only used
 // for explicit single removals.
 // Returns "false" if nothing was removed.
-bool IndirectReferenceTable::Remove(uint32_t cookie, IndirectRef iref) {
-  IRTSegmentState prevState;
-  prevState.all = cookie;
-  int topIndex = segment_state_.parts.topIndex;
-  int bottomIndex = prevState.parts.topIndex;
+bool IndirectReferenceTable::Remove(IRTSegmentState previous_state, IndirectRef iref) {
+  if (kDebugIRT) {
+    LOG(INFO) << "+++ Remove: previous_state=" << previous_state.top_index
+              << " top_index=" << segment_state_.top_index
+              << " last_known_prev_top_index=" << last_known_previous_state_.top_index
+              << " holes=" << current_num_holes_;
+  }
+
+  const uint32_t top_index = segment_state_.top_index;
+  const uint32_t bottom_index = previous_state.top_index;
 
   DCHECK(table_ != nullptr);
-  DCHECK_GE(segment_state_.parts.numHoles, prevState.parts.numHoles);
 
   if (GetIndirectRefKind(iref) == kHandleScopeOrInvalid) {
     auto* self = Thread::Current();
@@ -185,21 +339,24 @@
       return true;
     }
   }
-  const int idx = ExtractIndex(iref);
-  if (idx < bottomIndex) {
+  const uint32_t idx = ExtractIndex(iref);
+  if (idx < bottom_index) {
     // Wrong segment.
     LOG(WARNING) << "Attempt to remove index outside index area (" << idx
-                 << " vs " << bottomIndex << "-" << topIndex << ")";
+                 << " vs " << bottom_index << "-" << top_index << ")";
     return false;
   }
-  if (idx >= topIndex) {
+  if (idx >= top_index) {
     // Bad --- stale reference?
     LOG(WARNING) << "Attempt to remove invalid index " << idx
-                 << " (bottom=" << bottomIndex << " top=" << topIndex << ")";
+                 << " (bottom=" << bottom_index << " top=" << top_index << ")";
     return false;
   }
 
-  if (idx == topIndex - 1) {
+  RecoverHoles(previous_state);
+  CheckHoleCount(table_, current_num_holes_, previous_state, segment_state_);
+
+  if (idx == top_index - 1) {
     // Top-most entry.  Scan up and consume holes.
 
     if (!CheckEntry("remove", iref, idx)) {
@@ -207,28 +364,30 @@
     }
 
     *table_[idx].GetReference() = GcRoot<mirror::Object>(nullptr);
-    int numHoles = segment_state_.parts.numHoles - prevState.parts.numHoles;
-    if (numHoles != 0) {
-      while (--topIndex > bottomIndex && numHoles != 0) {
-        if ((false)) {
-          LOG(INFO) << "+++ checking for hole at " << topIndex - 1
-                    << " (cookie=" << cookie << ") val="
-                    << table_[topIndex - 1].GetReference()->Read<kWithoutReadBarrier>();
+    if (current_num_holes_ != 0) {
+      uint32_t collapse_top_index = top_index;
+      while (--collapse_top_index > bottom_index && current_num_holes_ != 0) {
+        if (kDebugIRT) {
+          ScopedObjectAccess soa(Thread::Current());
+          LOG(INFO) << "+++ checking for hole at " << collapse_top_index - 1
+                    << " (previous_state=" << bottom_index << ") val="
+                    << table_[collapse_top_index - 1].GetReference()->Read<kWithoutReadBarrier>();
         }
-        if (!table_[topIndex - 1].GetReference()->IsNull()) {
+        if (!table_[collapse_top_index - 1].GetReference()->IsNull()) {
           break;
         }
-        if ((false)) {
-          LOG(INFO) << "+++ ate hole at " << (topIndex - 1);
+        if (kDebugIRT) {
+          LOG(INFO) << "+++ ate hole at " << (collapse_top_index - 1);
         }
-        numHoles--;
+        current_num_holes_--;
       }
-      segment_state_.parts.numHoles = numHoles + prevState.parts.numHoles;
-      segment_state_.parts.topIndex = topIndex;
+      segment_state_.top_index = collapse_top_index;
+
+      CheckHoleCount(table_, current_num_holes_, previous_state, segment_state_);
     } else {
-      segment_state_.parts.topIndex = topIndex-1;
-      if ((false)) {
-        LOG(INFO) << "+++ ate last entry " << topIndex - 1;
+      segment_state_.top_index = top_index - 1;
+      if (kDebugIRT) {
+        LOG(INFO) << "+++ ate last entry " << top_index - 1;
       }
     }
   } else {
@@ -243,9 +402,10 @@
     }
 
     *table_[idx].GetReference() = GcRoot<mirror::Object>(nullptr);
-    segment_state_.parts.numHoles++;
-    if ((false)) {
-      LOG(INFO) << "+++ left hole at " << idx << ", holes=" << segment_state_.parts.numHoles;
+    current_num_holes_++;
+    CheckHoleCount(table_, current_num_holes_, previous_state, segment_state_);
+    if (kDebugIRT) {
+      LOG(INFO) << "+++ left hole at " << idx << ", holes=" << current_num_holes_;
     }
   }
 
@@ -283,4 +443,14 @@
   ReferenceTable::Dump(os, entries);
 }
 
+void IndirectReferenceTable::SetSegmentState(IRTSegmentState new_state) {
+  if (kDebugIRT) {
+    LOG(INFO) << "Setting segment state: "
+              << segment_state_.top_index
+              << " -> "
+              << new_state.top_index;
+  }
+  segment_state_ = new_state;
+}
+
 }  // namespace art
diff --git a/runtime/indirect_reference_table.h b/runtime/indirect_reference_table.h
index 64de7a8..7e452a2 100644
--- a/runtime/indirect_reference_table.h
+++ b/runtime/indirect_reference_table.h
@@ -20,8 +20,10 @@
 #include <stdint.h>
 
 #include <iosfwd>
+#include <limits>
 #include <string>
 
+#include "base/bit_utils.h"
 #include "base/logging.h"
 #include "base/mutex.h"
 #include "gc_root.h"
@@ -40,165 +42,118 @@
 
 class MemMap;
 
-/*
- * Maintain a table of indirect references.  Used for local/global JNI
- * references.
- *
- * The table contains object references that are part of the GC root set.
- * When an object is added we return an IndirectRef that is not a valid
- * pointer but can be used to find the original value in O(1) time.
- * Conversions to and from indirect references are performed on upcalls
- * and downcalls, so they need to be very fast.
- *
- * To be efficient for JNI local variable storage, we need to provide
- * operations that allow us to operate on segments of the table, where
- * segments are pushed and popped as if on a stack.  For example, deletion
- * of an entry should only succeed if it appears in the current segment,
- * and we want to be able to strip off the current segment quickly when
- * a method returns.  Additions to the table must be made in the current
- * segment even if space is available in an earlier area.
- *
- * A new segment is created when we call into native code from interpreted
- * code, or when we handle the JNI PushLocalFrame function.
- *
- * The GC must be able to scan the entire table quickly.
- *
- * In summary, these must be very fast:
- *  - adding or removing a segment
- *  - adding references to a new segment
- *  - converting an indirect reference back to an Object
- * These can be a little slower, but must still be pretty quick:
- *  - adding references to a "mature" segment
- *  - removing individual references
- *  - scanning the entire table straight through
- *
- * If there's more than one segment, we don't guarantee that the table
- * will fill completely before we fail due to lack of space.  We do ensure
- * that the current segment will pack tightly, which should satisfy JNI
- * requirements (e.g. EnsureLocalCapacity).
- *
- * To make everything fit nicely in 32-bit integers, the maximum size of
- * the table is capped at 64K.
- *
- * Only SynchronizedGet is synchronized.
- */
+// Maintain a table of indirect references.  Used for local/global JNI references.
+//
+// The table contains object references, where the strong (local/global) references are part of the
+// GC root set (but not the weak global references). When an object is added we return an
+// IndirectRef that is not a valid pointer but can be used to find the original value in O(1) time.
+// Conversions to and from indirect references are performed on upcalls and downcalls, so they need
+// to be very fast.
+//
+// To be efficient for JNI local variable storage, we need to provide operations that allow us to
+// operate on segments of the table, where segments are pushed and popped as if on a stack. For
+// example, deletion of an entry should only succeed if it appears in the current segment, and we
+// want to be able to strip off the current segment quickly when a method returns. Additions to the
+// table must be made in the current segment even if space is available in an earlier area.
+//
+// A new segment is created when we call into native code from interpreted code, or when we handle
+// the JNI PushLocalFrame function.
+//
+// The GC must be able to scan the entire table quickly.
+//
+// In summary, these must be very fast:
+//  - adding or removing a segment
+//  - adding references to a new segment
+//  - converting an indirect reference back to an Object
+// These can be a little slower, but must still be pretty quick:
+//  - adding references to a "mature" segment
+//  - removing individual references
+//  - scanning the entire table straight through
+//
+// If there's more than one segment, we don't guarantee that the table will fill completely before
+// we fail due to lack of space. We do ensure that the current segment will pack tightly, which
+// should satisfy JNI requirements (e.g. EnsureLocalCapacity).
+//
+// Only SynchronizedGet is synchronized.
 
-/*
- * Indirect reference definition.  This must be interchangeable with JNI's
- * jobject, and it's convenient to let null be null, so we use void*.
- *
- * We need a 16-bit table index and a 2-bit reference type (global, local,
- * weak global).  Real object pointers will have zeroes in the low 2 or 3
- * bits (4- or 8-byte alignment), so it's useful to put the ref type
- * in the low bits and reserve zero as an invalid value.
- *
- * The remaining 14 bits can be used to detect stale indirect references.
- * For example, if objects don't move, we can use a hash of the original
- * Object* to make sure the entry hasn't been re-used.  (If the Object*
- * we find there doesn't match because of heap movement, we could do a
- * secondary check on the preserved hash value; this implies that creating
- * a global/local ref queries the hash value and forces it to be saved.)
- *
- * A more rigorous approach would be to put a serial number in the extra
- * bits, and keep a copy of the serial number in a parallel table.  This is
- * easier when objects can move, but requires 2x the memory and additional
- * memory accesses on add/get.  It will catch additional problems, e.g.:
- * create iref1 for obj, delete iref1, create iref2 for same obj, lookup
- * iref1.  A pattern based on object bits will miss this.
- */
+// Indirect reference definition.  This must be interchangeable with JNI's jobject, and it's
+// convenient to let null be null, so we use void*.
+//
+// We need a (potentially) large table index and a 2-bit reference type (global, local, weak
+// global). We also reserve some bits to be used to detect stale indirect references: we put a
+// serial number in the extra bits, and keep a copy of the serial number in the table. This requires
+// more memory and additional memory accesses on add/get, but is moving-GC safe. It will catch
+// additional problems, e.g.: create iref1 for obj, delete iref1, create iref2 for same obj,
+// lookup iref1. A pattern based on object bits will miss this.
 typedef void* IndirectRef;
 
-/*
- * Indirect reference kind, used as the two low bits of IndirectRef.
- *
- * For convenience these match up with enum jobjectRefType from jni.h.
- */
+// Indirect reference kind, used as the two low bits of IndirectRef.
+//
+// For convenience these match up with enum jobjectRefType from jni.h.
 enum IndirectRefKind {
-  kHandleScopeOrInvalid = 0,  // <<stack indirect reference table or invalid reference>>
-  kLocal         = 1,  // <<local reference>>
-  kGlobal        = 2,  // <<global reference>>
-  kWeakGlobal    = 3   // <<weak global reference>>
+  kHandleScopeOrInvalid = 0,           // <<stack indirect reference table or invalid reference>>
+  kLocal                = 1,           // <<local reference>>
+  kGlobal               = 2,           // <<global reference>>
+  kWeakGlobal           = 3,           // <<weak global reference>>
+  kLastKind             = kWeakGlobal
 };
 std::ostream& operator<<(std::ostream& os, const IndirectRefKind& rhs);
 const char* GetIndirectRefKindString(const IndirectRefKind& kind);
 
-/*
- * Determine what kind of indirect reference this is.
- */
-static inline IndirectRefKind GetIndirectRefKind(IndirectRef iref) {
-  return static_cast<IndirectRefKind>(reinterpret_cast<uintptr_t>(iref) & 0x03);
-}
+// Table definition.
+//
+// For the global reference table, the expected common operations are adding a new entry and
+// removing a recently-added entry (usually the most-recently-added entry).  For JNI local
+// references, the common operations are adding a new entry and removing an entire table segment.
+//
+// If we delete entries from the middle of the list, we will be left with "holes".  We track the
+// number of holes so that, when adding new elements, we can quickly decide to do a trivial append
+// or go slot-hunting.
+//
+// When the top-most entry is removed, any holes immediately below it are also removed. Thus,
+// deletion of an entry may reduce "top_index" by more than one.
+//
+// To get the desired behavior for JNI locals, we need to know the bottom and top of the current
+// "segment". The top is managed internally, and the bottom is passed in as a function argument.
+// When we call a native method or push a local frame, the current top index gets pushed on, and
+// serves as the new bottom. When we pop a frame off, the value from the stack becomes the new top
+// index, and the value stored in the previous frame becomes the new bottom.
+//
+// Holes are being locally cached for the segment. Otherwise we'd have to pass bottom index and
+// number of holes, which restricts us to 16 bits for the top index. The value is cached within the
+// table. To avoid code in generated JNI transitions, which implicitly form segments, the code for
+// adding and removing references needs to detect the change of a segment. Helper fields are used
+// for this detection.
+//
+// Common alternative implementation: make IndirectRef a pointer to the actual reference slot.
+// Instead of getting a table and doing a lookup, the lookup can be done instantly. Operations like
+// determining the type and deleting the reference are more expensive because the table must be
+// hunted for (i.e. you have to do a pointer comparison to see which table it's in), you can't move
+// the table when expanding it (so realloc() is out), and tricks like serial number checking to
+// detect stale references aren't possible (though we may be able to get similar benefits with other
+// approaches).
+//
+// TODO: consider a "lastDeleteIndex" for quick hole-filling when an add immediately follows a
+// delete; must invalidate after segment pop might be worth only using it for JNI globals.
+//
+// TODO: may want completely different add/remove algorithms for global and local refs to improve
+// performance.  A large circular buffer might reduce the amortized cost of adding global
+// references.
 
-/* use as initial value for "cookie", and when table has only one segment */
-static const uint32_t IRT_FIRST_SEGMENT = 0;
-
-/*
- * Table definition.
- *
- * For the global reference table, the expected common operations are
- * adding a new entry and removing a recently-added entry (usually the
- * most-recently-added entry).  For JNI local references, the common
- * operations are adding a new entry and removing an entire table segment.
- *
- * If "alloc_entries_" is not equal to "max_entries_", the table may expand
- * when entries are added, which means the memory may move.  If you want
- * to keep pointers into "table" rather than offsets, you must use a
- * fixed-size table.
- *
- * If we delete entries from the middle of the list, we will be left with
- * "holes".  We track the number of holes so that, when adding new elements,
- * we can quickly decide to do a trivial append or go slot-hunting.
- *
- * When the top-most entry is removed, any holes immediately below it are
- * also removed.  Thus, deletion of an entry may reduce "topIndex" by more
- * than one.
- *
- * To get the desired behavior for JNI locals, we need to know the bottom
- * and top of the current "segment".  The top is managed internally, and
- * the bottom is passed in as a function argument.  When we call a native method or
- * push a local frame, the current top index gets pushed on, and serves
- * as the new bottom.  When we pop a frame off, the value from the stack
- * becomes the new top index, and the value stored in the previous frame
- * becomes the new bottom.
- *
- * To avoid having to re-scan the table after a pop, we want to push the
- * number of holes in the table onto the stack.  Because of our 64K-entry
- * cap, we can combine the two into a single unsigned 32-bit value.
- * Instead of a "bottom" argument we take a "cookie", which includes the
- * bottom index and the count of holes below the bottom.
- *
- * Common alternative implementation: make IndirectRef a pointer to the
- * actual reference slot.  Instead of getting a table and doing a lookup,
- * the lookup can be done instantly.  Operations like determining the
- * type and deleting the reference are more expensive because the table
- * must be hunted for (i.e. you have to do a pointer comparison to see
- * which table it's in), you can't move the table when expanding it (so
- * realloc() is out), and tricks like serial number checking to detect
- * stale references aren't possible (though we may be able to get similar
- * benefits with other approaches).
- *
- * TODO: consider a "lastDeleteIndex" for quick hole-filling when an
- * add immediately follows a delete; must invalidate after segment pop
- * (which could increase the cost/complexity of method call/return).
- * Might be worth only using it for JNI globals.
- *
- * TODO: may want completely different add/remove algorithms for global
- * and local refs to improve performance.  A large circular buffer might
- * reduce the amortized cost of adding global references.
- *
- */
-union IRTSegmentState {
-  uint32_t          all;
-  struct {
-    uint32_t      topIndex:16;            /* index of first unused entry */
-    uint32_t      numHoles:16;            /* #of holes in entire table */
-  } parts;
+// The state of the current segment. We only store the index. Splitting it for index and hole
+// count restricts the range too much.
+struct IRTSegmentState {
+  uint32_t top_index;
 };
 
+// Use as initial value for "cookie", and when table has only one segment.
+static constexpr IRTSegmentState kIRTFirstSegment = { 0 };
+
 // Try to choose kIRTPrevCount so that sizeof(IrtEntry) is a power of 2.
 // Contains multiple entries but only one active one, this helps us detect use after free errors
 // since the serial stored in the indirect ref wont match.
-static const size_t kIRTPrevCount = kIsDebugBuild ? 7 : 3;
+static constexpr size_t kIRTPrevCount = kIsDebugBuild ? 7 : 3;
+
 class IrtEntry {
  public:
   void Add(ObjPtr<mirror::Object> obj) REQUIRES_SHARED(Locks::mutator_lock_);
@@ -208,6 +163,11 @@
     return &references_[serial_];
   }
 
+  const GcRoot<mirror::Object>* GetReference() const {
+    DCHECK_LT(serial_, kIRTPrevCount);
+    return &references_[serial_];
+  }
+
   uint32_t GetSerial() const {
     return serial_;
   }
@@ -220,6 +180,7 @@
 };
 static_assert(sizeof(IrtEntry) == (1 + kIRTPrevCount) * sizeof(uint32_t),
               "Unexpected sizeof(IrtEntry)");
+static_assert(IsPowerOfTwo(sizeof(IrtEntry)), "Unexpected sizeof(IrtEntry)");
 
 class IrtIterator {
  public:
@@ -257,29 +218,40 @@
 
 class IndirectReferenceTable {
  public:
-  // WARNING: When using with abort_on_error = false, the object may be in a partially
-  //          initialized state. Use IsValid() to check.
-  IndirectReferenceTable(size_t initialCount, size_t maxCount, IndirectRefKind kind,
-                         bool abort_on_error = true);
+  enum class ResizableCapacity {
+    kNo,
+    kYes
+  };
+
+  // WARNING: Construction of the IndirectReferenceTable may fail.
+  // error_msg must not be null. If error_msg is set by the constructor, then
+  // construction has failed and the IndirectReferenceTable will be in an
+  // invalid state. Use IsValid to check whether the object is in an invalid
+  // state.
+  IndirectReferenceTable(size_t max_count,
+                         IndirectRefKind kind,
+                         ResizableCapacity resizable,
+                         std::string* error_msg);
 
   ~IndirectReferenceTable();
 
+  /*
+   * Checks whether construction of the IndirectReferenceTable succeeded.
+   *
+   * This object must only be used if IsValid() returns true. It is safe to
+   * call IsValid from multiple threads without locking or other explicit
+   * synchronization.
+   */
   bool IsValid() const;
 
-  /*
-   * Add a new entry.  "obj" must be a valid non-nullptr object reference.
-   *
-   * Returns nullptr if the table is full (max entries reached, or alloc
-   * failed during expansion).
-   */
-  IndirectRef Add(uint32_t cookie, ObjPtr<mirror::Object> obj)
+  // Add a new entry. "obj" must be a valid non-null object reference. This function will
+  // abort if the table is full (max entries reached, or expansion failed).
+  IndirectRef Add(IRTSegmentState previous_state, ObjPtr<mirror::Object> obj)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  /*
-   * Given an IndirectRef in the table, return the Object it refers to.
-   *
-   * Returns kInvalidIndirectRefObject if iref is invalid.
-   */
+  // Given an IndirectRef in the table, return the Object it refers to.
+  //
+  // This function may abort under error conditions.
   template<ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ObjPtr<mirror::Object> Get(IndirectRef iref) const REQUIRES_SHARED(Locks::mutator_lock_)
       ALWAYS_INLINE;
@@ -291,34 +263,26 @@
     return Get<kReadBarrierOption>(iref);
   }
 
-  /*
-   * Update an existing entry.
-   *
-   * Updates an existing indirect reference to point to a new object.
-   */
+  // Updates an existing indirect reference to point to a new object.
   void Update(IndirectRef iref, ObjPtr<mirror::Object> obj) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  /*
-   * Remove an existing entry.
-   *
-   * If the entry is not between the current top index and the bottom index
-   * specified by the cookie, we don't remove anything.  This is the behavior
-   * required by JNI's DeleteLocalRef function.
-   *
-   * Returns "false" if nothing was removed.
-   */
-  bool Remove(uint32_t cookie, IndirectRef iref);
+  // Remove an existing entry.
+  //
+  // If the entry is not between the current top index and the bottom index
+  // specified by the cookie, we don't remove anything.  This is the behavior
+  // required by JNI's DeleteLocalRef function.
+  //
+  // Returns "false" if nothing was removed.
+  bool Remove(IRTSegmentState previous_state, IndirectRef iref);
 
   void AssertEmpty() REQUIRES_SHARED(Locks::mutator_lock_);
 
   void Dump(std::ostream& os) const REQUIRES_SHARED(Locks::mutator_lock_);
 
-  /*
-   * Return the #of entries in the entire table.  This includes holes, and
-   * so may be larger than the actual number of "live" entries.
-   */
+  // Return the #of entries in the entire table.  This includes holes, and
+  // so may be larger than the actual number of "live" entries.
   size_t Capacity() const {
-    return segment_state_.parts.topIndex;
+    return segment_state_.top_index;
   }
 
   // Note IrtIterator does not have a read barrier as it's used to visit roots.
@@ -333,13 +297,11 @@
   void VisitRoots(RootVisitor* visitor, const RootInfo& root_info)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  uint32_t GetSegmentState() const {
-    return segment_state_.all;
+  IRTSegmentState GetSegmentState() const {
+    return segment_state_;
   }
 
-  void SetSegmentState(uint32_t new_state) {
-    segment_state_.all = new_state;
-  }
+  void SetSegmentState(IRTSegmentState new_state);
 
   static Offset SegmentStateOffset(size_t pointer_size ATTRIBUTE_UNUSED) {
     // Note: Currently segment_state_ is at offset 0. We're testing the expected value in
@@ -351,32 +313,74 @@
   // Release pages past the end of the table that may have previously held references.
   void Trim() REQUIRES_SHARED(Locks::mutator_lock_);
 
- private:
-  // Extract the table index from an indirect reference.
-  static uint32_t ExtractIndex(IndirectRef iref) {
-    uintptr_t uref = reinterpret_cast<uintptr_t>(iref);
-    return (uref >> 2) & 0xffff;
+  // Determine what kind of indirect reference this is. Opposite of EncodeIndirectRefKind.
+  ALWAYS_INLINE static inline IndirectRefKind GetIndirectRefKind(IndirectRef iref) {
+    return DecodeIndirectRefKind(reinterpret_cast<uintptr_t>(iref));
   }
 
-  /*
-   * The object pointer itself is subject to relocation in some GC
-   * implementations, so we shouldn't really be using it here.
-   */
-  IndirectRef ToIndirectRef(uint32_t tableIndex) const {
-    DCHECK_LT(tableIndex, 65536U);
-    uint32_t serialChunk = table_[tableIndex].GetSerial();
-    uintptr_t uref = (serialChunk << 20) | (tableIndex << 2) | kind_;
-    return reinterpret_cast<IndirectRef>(uref);
+ private:
+  static constexpr size_t kSerialBits = MinimumBitsToStore(kIRTPrevCount);
+  static constexpr uint32_t kShiftedSerialMask = (1u << kSerialBits) - 1;
+
+  static constexpr size_t kKindBits = MinimumBitsToStore(
+      static_cast<uint32_t>(IndirectRefKind::kLastKind));
+  static constexpr uint32_t kKindMask = (1u << kKindBits) - 1;
+
+  static constexpr uintptr_t EncodeIndex(uint32_t table_index) {
+    static_assert(sizeof(IndirectRef) == sizeof(uintptr_t), "Unexpected IndirectRef size");
+    DCHECK_LE(MinimumBitsToStore(table_index), BitSizeOf<uintptr_t>() - kSerialBits - kKindBits);
+    return (static_cast<uintptr_t>(table_index) << kKindBits << kSerialBits);
   }
+  static constexpr uint32_t DecodeIndex(uintptr_t uref) {
+    return static_cast<uint32_t>((uref >> kKindBits) >> kSerialBits);
+  }
+
+  static constexpr uintptr_t EncodeIndirectRefKind(IndirectRefKind kind) {
+    return static_cast<uintptr_t>(kind);
+  }
+  static constexpr IndirectRefKind DecodeIndirectRefKind(uintptr_t uref) {
+    return static_cast<IndirectRefKind>(uref & kKindMask);
+  }
+
+  static constexpr uintptr_t EncodeSerial(uint32_t serial) {
+    DCHECK_LE(MinimumBitsToStore(serial), kSerialBits);
+    return serial << kKindBits;
+  }
+  static constexpr uint32_t DecodeSerial(uintptr_t uref) {
+    return static_cast<uint32_t>(uref >> kKindBits) & kShiftedSerialMask;
+  }
+
+  constexpr uintptr_t EncodeIndirectRef(uint32_t table_index, uint32_t serial) const {
+    DCHECK_LT(table_index, max_entries_);
+    return EncodeIndex(table_index) | EncodeSerial(serial) | EncodeIndirectRefKind(kind_);
+  }
+
+  static void ConstexprChecks();
+
+  // Extract the table index from an indirect reference.
+  ALWAYS_INLINE static uint32_t ExtractIndex(IndirectRef iref) {
+    return DecodeIndex(reinterpret_cast<uintptr_t>(iref));
+  }
+
+  IndirectRef ToIndirectRef(uint32_t table_index) const {
+    DCHECK_LT(table_index, max_entries_);
+    uint32_t serial = table_[table_index].GetSerial();
+    return reinterpret_cast<IndirectRef>(EncodeIndirectRef(table_index, serial));
+  }
+
+  // Resize the backing table. Currently must be larger than the current size.
+  bool Resize(size_t new_size, std::string* error_msg);
+
+  void RecoverHoles(IRTSegmentState from);
 
   // Abort if check_jni is not enabled. Otherwise, just log as an error.
   static void AbortIfNoCheckJNI(const std::string& msg);
 
   /* extra debugging checks */
   bool GetChecked(IndirectRef) const REQUIRES_SHARED(Locks::mutator_lock_);
-  bool CheckEntry(const char*, IndirectRef, int) const;
+  bool CheckEntry(const char*, IndirectRef, uint32_t) const;
 
-  /* semi-public - read/write by jni down calls */
+  /// semi-public - read/write by jni down calls.
   IRTSegmentState segment_state_;
 
   // Mem map where we store the indirect refs.
@@ -384,10 +388,21 @@
   // bottom of the stack. Do not directly access the object references
   // in this as they are roots. Use Get() that has a read barrier.
   IrtEntry* table_;
-  /* bit mask, ORed into all irefs */
+  // bit mask, ORed into all irefs.
   const IndirectRefKind kind_;
-  /* max #of entries allowed */
-  const size_t max_entries_;
+
+  // max #of entries allowed (modulo resizing).
+  size_t max_entries_;
+
+  // Some values to retain old behavior with holes. Description of the algorithm is in the .cc
+  // file.
+  // TODO: Consider other data structures for compact tables, e.g., free lists.
+  size_t current_num_holes_;
+  IRTSegmentState last_known_previous_state_;
+
+  // Whether the table's capacity may be resized. As there are no locks used, it is the caller's
+  // responsibility to ensure thread-safety.
+  ResizableCapacity resizable_;
 };
 
 }  // namespace art
diff --git a/runtime/indirect_reference_table_test.cc b/runtime/indirect_reference_table_test.cc
index 0380f3e..722b411 100644
--- a/runtime/indirect_reference_table_test.cc
+++ b/runtime/indirect_reference_table_test.cc
@@ -48,22 +48,27 @@
   ScopedLogSeverity sls(LogSeverity::FATAL);
 
   ScopedObjectAccess soa(Thread::Current());
-  static const size_t kTableInitial = 10;
   static const size_t kTableMax = 20;
-  IndirectReferenceTable irt(kTableInitial, kTableMax, kGlobal);
+  std::string error_msg;
+  IndirectReferenceTable irt(kTableMax,
+                             kGlobal,
+                             IndirectReferenceTable::ResizableCapacity::kNo,
+                             &error_msg);
+  ASSERT_TRUE(irt.IsValid()) << error_msg;
 
   mirror::Class* c = class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Object;");
+  StackHandleScope<4> hs(soa.Self());
   ASSERT_TRUE(c != nullptr);
-  mirror::Object* obj0 = c->AllocObject(soa.Self());
-  ASSERT_TRUE(obj0 != nullptr);
-  mirror::Object* obj1 = c->AllocObject(soa.Self());
-  ASSERT_TRUE(obj1 != nullptr);
-  mirror::Object* obj2 = c->AllocObject(soa.Self());
-  ASSERT_TRUE(obj2 != nullptr);
-  mirror::Object* obj3 = c->AllocObject(soa.Self());
-  ASSERT_TRUE(obj3 != nullptr);
+  Handle<mirror::Object> obj0 = hs.NewHandle(c->AllocObject(soa.Self()));
+  ASSERT_TRUE(obj0.Get() != nullptr);
+  Handle<mirror::Object> obj1 = hs.NewHandle(c->AllocObject(soa.Self()));
+  ASSERT_TRUE(obj1.Get() != nullptr);
+  Handle<mirror::Object> obj2 = hs.NewHandle(c->AllocObject(soa.Self()));
+  ASSERT_TRUE(obj2.Get() != nullptr);
+  Handle<mirror::Object> obj3 = hs.NewHandle(c->AllocObject(soa.Self()));
+  ASSERT_TRUE(obj3.Get() != nullptr);
 
-  const uint32_t cookie = IRT_FIRST_SEGMENT;
+  const IRTSegmentState cookie = kIRTFirstSegment;
 
   CheckDump(&irt, 0, 0);
 
@@ -71,19 +76,19 @@
   EXPECT_FALSE(irt.Remove(cookie, iref0)) << "unexpectedly successful removal";
 
   // Add three, check, remove in the order in which they were added.
-  iref0 = irt.Add(cookie, obj0);
+  iref0 = irt.Add(cookie, obj0.Get());
   EXPECT_TRUE(iref0 != nullptr);
   CheckDump(&irt, 1, 1);
-  IndirectRef iref1 = irt.Add(cookie, obj1);
+  IndirectRef iref1 = irt.Add(cookie, obj1.Get());
   EXPECT_TRUE(iref1 != nullptr);
   CheckDump(&irt, 2, 2);
-  IndirectRef iref2 = irt.Add(cookie, obj2);
+  IndirectRef iref2 = irt.Add(cookie, obj2.Get());
   EXPECT_TRUE(iref2 != nullptr);
   CheckDump(&irt, 3, 3);
 
-  EXPECT_OBJ_PTR_EQ(obj0, irt.Get(iref0));
-  EXPECT_OBJ_PTR_EQ(obj1, irt.Get(iref1));
-  EXPECT_OBJ_PTR_EQ(obj2, irt.Get(iref2));
+  EXPECT_OBJ_PTR_EQ(obj0.Get(), irt.Get(iref0));
+  EXPECT_OBJ_PTR_EQ(obj1.Get(), irt.Get(iref1));
+  EXPECT_OBJ_PTR_EQ(obj2.Get(), irt.Get(iref2));
 
   EXPECT_TRUE(irt.Remove(cookie, iref0));
   CheckDump(&irt, 2, 2);
@@ -99,11 +104,11 @@
   EXPECT_TRUE(irt.Get(iref0) == nullptr);
 
   // Add three, remove in the opposite order.
-  iref0 = irt.Add(cookie, obj0);
+  iref0 = irt.Add(cookie, obj0.Get());
   EXPECT_TRUE(iref0 != nullptr);
-  iref1 = irt.Add(cookie, obj1);
+  iref1 = irt.Add(cookie, obj1.Get());
   EXPECT_TRUE(iref1 != nullptr);
-  iref2 = irt.Add(cookie, obj2);
+  iref2 = irt.Add(cookie, obj2.Get());
   EXPECT_TRUE(iref2 != nullptr);
   CheckDump(&irt, 3, 3);
 
@@ -119,11 +124,11 @@
 
   // Add three, remove middle / middle / bottom / top.  (Second attempt
   // to remove middle should fail.)
-  iref0 = irt.Add(cookie, obj0);
+  iref0 = irt.Add(cookie, obj0.Get());
   EXPECT_TRUE(iref0 != nullptr);
-  iref1 = irt.Add(cookie, obj1);
+  iref1 = irt.Add(cookie, obj1.Get());
   EXPECT_TRUE(iref1 != nullptr);
-  iref2 = irt.Add(cookie, obj2);
+  iref2 = irt.Add(cookie, obj2.Get());
   EXPECT_TRUE(iref2 != nullptr);
   CheckDump(&irt, 3, 3);
 
@@ -148,20 +153,20 @@
   // Add four entries.  Remove #1, add new entry, verify that table size
   // is still 4 (i.e. holes are getting filled).  Remove #1 and #3, verify
   // that we delete one and don't hole-compact the other.
-  iref0 = irt.Add(cookie, obj0);
+  iref0 = irt.Add(cookie, obj0.Get());
   EXPECT_TRUE(iref0 != nullptr);
-  iref1 = irt.Add(cookie, obj1);
+  iref1 = irt.Add(cookie, obj1.Get());
   EXPECT_TRUE(iref1 != nullptr);
-  iref2 = irt.Add(cookie, obj2);
+  iref2 = irt.Add(cookie, obj2.Get());
   EXPECT_TRUE(iref2 != nullptr);
-  IndirectRef iref3 = irt.Add(cookie, obj3);
+  IndirectRef iref3 = irt.Add(cookie, obj3.Get());
   EXPECT_TRUE(iref3 != nullptr);
   CheckDump(&irt, 4, 4);
 
   ASSERT_TRUE(irt.Remove(cookie, iref1));
   CheckDump(&irt, 3, 3);
 
-  iref1 = irt.Add(cookie, obj1);
+  iref1 = irt.Add(cookie, obj1.Get());
   EXPECT_TRUE(iref1 != nullptr);
 
   ASSERT_EQ(4U, irt.Capacity()) << "hole not filled";
@@ -184,12 +189,12 @@
   // Add an entry, remove it, add a new entry, and try to use the original
   // iref.  They have the same slot number but are for different objects.
   // With the extended checks in place, this should fail.
-  iref0 = irt.Add(cookie, obj0);
+  iref0 = irt.Add(cookie, obj0.Get());
   EXPECT_TRUE(iref0 != nullptr);
   CheckDump(&irt, 1, 1);
   ASSERT_TRUE(irt.Remove(cookie, iref0));
   CheckDump(&irt, 0, 0);
-  iref1 = irt.Add(cookie, obj1);
+  iref1 = irt.Add(cookie, obj1.Get());
   EXPECT_TRUE(iref1 != nullptr);
   CheckDump(&irt, 1, 1);
   ASSERT_FALSE(irt.Remove(cookie, iref0)) << "mismatched del succeeded";
@@ -200,12 +205,12 @@
 
   // Same as above, but with the same object.  A more rigorous checker
   // (e.g. with slot serialization) will catch this.
-  iref0 = irt.Add(cookie, obj0);
+  iref0 = irt.Add(cookie, obj0.Get());
   EXPECT_TRUE(iref0 != nullptr);
   CheckDump(&irt, 1, 1);
   ASSERT_TRUE(irt.Remove(cookie, iref0));
   CheckDump(&irt, 0, 0);
-  iref1 = irt.Add(cookie, obj0);
+  iref1 = irt.Add(cookie, obj0.Get());
   EXPECT_TRUE(iref1 != nullptr);
   CheckDump(&irt, 1, 1);
   if (iref0 != iref1) {
@@ -220,7 +225,7 @@
   ASSERT_TRUE(irt.Get(nullptr) == nullptr);
 
   // Stale lookup.
-  iref0 = irt.Add(cookie, obj0);
+  iref0 = irt.Add(cookie, obj0.Get());
   EXPECT_TRUE(iref0 != nullptr);
   CheckDump(&irt, 1, 1);
   ASSERT_TRUE(irt.Remove(cookie, iref0));
@@ -229,14 +234,15 @@
 
   // Test table resizing.
   // These ones fit...
+  static const size_t kTableInitial = kTableMax / 2;
   IndirectRef manyRefs[kTableInitial];
   for (size_t i = 0; i < kTableInitial; i++) {
-    manyRefs[i] = irt.Add(cookie, obj0);
+    manyRefs[i] = irt.Add(cookie, obj0.Get());
     ASSERT_TRUE(manyRefs[i] != nullptr) << "Failed adding " << i;
     CheckDump(&irt, i + 1, 1);
   }
   // ...this one causes overflow.
-  iref0 = irt.Add(cookie, obj0);
+  iref0 = irt.Add(cookie, obj0.Get());
   ASSERT_TRUE(iref0 != nullptr);
   ASSERT_EQ(kTableInitial + 1, irt.Capacity());
   CheckDump(&irt, kTableInitial + 1, 1);
@@ -254,4 +260,250 @@
   CheckDump(&irt, 0, 0);
 }
 
+TEST_F(IndirectReferenceTableTest, Holes) {
+  // Test the explicitly named cases from the IRT implementation:
+  //
+  // 1) Segment with holes (current_num_holes_ > 0), push new segment, add/remove reference
+  // 2) Segment with holes (current_num_holes_ > 0), pop segment, add/remove reference
+  // 3) Segment with holes (current_num_holes_ > 0), push new segment, pop segment, add/remove
+  //    reference
+  // 4) Empty segment, push new segment, create a hole, pop a segment, add/remove a reference
+  // 5) Base segment, push new segment, create a hole, pop a segment, push new segment, add/remove
+  //    reference
+
+  ScopedObjectAccess soa(Thread::Current());
+  static const size_t kTableMax = 10;
+
+  mirror::Class* c = class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Object;");
+  StackHandleScope<5> hs(soa.Self());
+  ASSERT_TRUE(c != nullptr);
+  Handle<mirror::Object> obj0 = hs.NewHandle(c->AllocObject(soa.Self()));
+  ASSERT_TRUE(obj0.Get() != nullptr);
+  Handle<mirror::Object> obj1 = hs.NewHandle(c->AllocObject(soa.Self()));
+  ASSERT_TRUE(obj1.Get() != nullptr);
+  Handle<mirror::Object> obj2 = hs.NewHandle(c->AllocObject(soa.Self()));
+  ASSERT_TRUE(obj2.Get() != nullptr);
+  Handle<mirror::Object> obj3 = hs.NewHandle(c->AllocObject(soa.Self()));
+  ASSERT_TRUE(obj3.Get() != nullptr);
+  Handle<mirror::Object> obj4 = hs.NewHandle(c->AllocObject(soa.Self()));
+  ASSERT_TRUE(obj4.Get() != nullptr);
+
+  std::string error_msg;
+
+  // 1) Segment with holes (current_num_holes_ > 0), push new segment, add/remove reference.
+  {
+    IndirectReferenceTable irt(kTableMax,
+                               kGlobal,
+                               IndirectReferenceTable::ResizableCapacity::kNo,
+                               &error_msg);
+    ASSERT_TRUE(irt.IsValid()) << error_msg;
+
+    const IRTSegmentState cookie0 = kIRTFirstSegment;
+
+    CheckDump(&irt, 0, 0);
+
+    IndirectRef iref0 = irt.Add(cookie0, obj0.Get());
+    IndirectRef iref1 = irt.Add(cookie0, obj1.Get());
+    IndirectRef iref2 = irt.Add(cookie0, obj2.Get());
+
+    EXPECT_TRUE(irt.Remove(cookie0, iref1));
+
+    // New segment.
+    const IRTSegmentState cookie1 = irt.GetSegmentState();
+
+    IndirectRef iref3 = irt.Add(cookie1, obj3.Get());
+
+    // Must not have filled the previous hole.
+    EXPECT_EQ(irt.Capacity(), 4u);
+    EXPECT_TRUE(irt.Get(iref1) == nullptr);
+    CheckDump(&irt, 3, 3);
+
+    UNUSED(iref0, iref1, iref2, iref3);
+  }
+
+  // 2) Segment with holes (current_num_holes_ > 0), pop segment, add/remove reference
+  {
+    IndirectReferenceTable irt(kTableMax,
+                               kGlobal,
+                               IndirectReferenceTable::ResizableCapacity::kNo,
+                               &error_msg);
+    ASSERT_TRUE(irt.IsValid()) << error_msg;
+
+    const IRTSegmentState cookie0 = kIRTFirstSegment;
+
+    CheckDump(&irt, 0, 0);
+
+    IndirectRef iref0 = irt.Add(cookie0, obj0.Get());
+
+    // New segment.
+    const IRTSegmentState cookie1 = irt.GetSegmentState();
+
+    IndirectRef iref1 = irt.Add(cookie1, obj1.Get());
+    IndirectRef iref2 = irt.Add(cookie1, obj2.Get());
+    IndirectRef iref3 = irt.Add(cookie1, obj3.Get());
+
+    EXPECT_TRUE(irt.Remove(cookie1, iref2));
+
+    // Pop segment.
+    irt.SetSegmentState(cookie1);
+
+    IndirectRef iref4 = irt.Add(cookie1, obj4.Get());
+
+    EXPECT_EQ(irt.Capacity(), 2u);
+    EXPECT_TRUE(irt.Get(iref2) == nullptr);
+    CheckDump(&irt, 2, 2);
+
+    UNUSED(iref0, iref1, iref2, iref3, iref4);
+  }
+
+  // 3) Segment with holes (current_num_holes_ > 0), push new segment, pop segment, add/remove
+  //    reference.
+  {
+    IndirectReferenceTable irt(kTableMax,
+                               kGlobal,
+                               IndirectReferenceTable::ResizableCapacity::kNo,
+                               &error_msg);
+    ASSERT_TRUE(irt.IsValid()) << error_msg;
+
+    const IRTSegmentState cookie0 = kIRTFirstSegment;
+
+    CheckDump(&irt, 0, 0);
+
+    IndirectRef iref0 = irt.Add(cookie0, obj0.Get());
+
+    // New segment.
+    const IRTSegmentState cookie1 = irt.GetSegmentState();
+
+    IndirectRef iref1 = irt.Add(cookie1, obj1.Get());
+    IndirectRef iref2 = irt.Add(cookie1, obj2.Get());
+
+    EXPECT_TRUE(irt.Remove(cookie1, iref1));
+
+    // New segment.
+    const IRTSegmentState cookie2 = irt.GetSegmentState();
+
+    IndirectRef iref3 = irt.Add(cookie2, obj3.Get());
+
+    // Pop segment.
+    irt.SetSegmentState(cookie2);
+
+    IndirectRef iref4 = irt.Add(cookie1, obj4.Get());
+
+    EXPECT_EQ(irt.Capacity(), 3u);
+    EXPECT_TRUE(irt.Get(iref1) == nullptr);
+    CheckDump(&irt, 3, 3);
+
+    UNUSED(iref0, iref1, iref2, iref3, iref4);
+  }
+
+  // 4) Empty segment, push new segment, create a hole, pop a segment, add/remove a reference.
+  {
+    IndirectReferenceTable irt(kTableMax,
+                               kGlobal,
+                               IndirectReferenceTable::ResizableCapacity::kNo,
+                               &error_msg);
+    ASSERT_TRUE(irt.IsValid()) << error_msg;
+
+    const IRTSegmentState cookie0 = kIRTFirstSegment;
+
+    CheckDump(&irt, 0, 0);
+
+    IndirectRef iref0 = irt.Add(cookie0, obj0.Get());
+
+    // New segment.
+    const IRTSegmentState cookie1 = irt.GetSegmentState();
+
+    IndirectRef iref1 = irt.Add(cookie1, obj1.Get());
+    EXPECT_TRUE(irt.Remove(cookie1, iref1));
+
+    // Emptied segment, push new one.
+    const IRTSegmentState cookie2 = irt.GetSegmentState();
+
+    IndirectRef iref2 = irt.Add(cookie1, obj1.Get());
+    IndirectRef iref3 = irt.Add(cookie1, obj2.Get());
+    IndirectRef iref4 = irt.Add(cookie1, obj3.Get());
+
+    EXPECT_TRUE(irt.Remove(cookie1, iref3));
+
+    // Pop segment.
+    UNUSED(cookie2);
+    irt.SetSegmentState(cookie1);
+
+    IndirectRef iref5 = irt.Add(cookie1, obj4.Get());
+
+    EXPECT_EQ(irt.Capacity(), 2u);
+    EXPECT_TRUE(irt.Get(iref3) == nullptr);
+    CheckDump(&irt, 2, 2);
+
+    UNUSED(iref0, iref1, iref2, iref3, iref4, iref5);
+  }
+
+  // 5) Base segment, push new segment, create a hole, pop a segment, push new segment, add/remove
+  //    reference
+  {
+    IndirectReferenceTable irt(kTableMax,
+                               kGlobal,
+                               IndirectReferenceTable::ResizableCapacity::kNo,
+                               &error_msg);
+    ASSERT_TRUE(irt.IsValid()) << error_msg;
+
+    const IRTSegmentState cookie0 = kIRTFirstSegment;
+
+    CheckDump(&irt, 0, 0);
+
+    IndirectRef iref0 = irt.Add(cookie0, obj0.Get());
+
+    // New segment.
+    const IRTSegmentState cookie1 = irt.GetSegmentState();
+
+    IndirectRef iref1 = irt.Add(cookie1, obj1.Get());
+    IndirectRef iref2 = irt.Add(cookie1, obj1.Get());
+    IndirectRef iref3 = irt.Add(cookie1, obj2.Get());
+
+    EXPECT_TRUE(irt.Remove(cookie1, iref2));
+
+    // Pop segment.
+    irt.SetSegmentState(cookie1);
+
+    // Push segment.
+    const IRTSegmentState cookie1_second = irt.GetSegmentState();
+    UNUSED(cookie1_second);
+
+    IndirectRef iref4 = irt.Add(cookie1, obj3.Get());
+
+    EXPECT_EQ(irt.Capacity(), 2u);
+    EXPECT_TRUE(irt.Get(iref3) == nullptr);
+    CheckDump(&irt, 2, 2);
+
+    UNUSED(iref0, iref1, iref2, iref3, iref4);
+  }
+}
+
+TEST_F(IndirectReferenceTableTest, Resize) {
+  ScopedObjectAccess soa(Thread::Current());
+  static const size_t kTableMax = 512;
+
+  mirror::Class* c = class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Object;");
+  StackHandleScope<1> hs(soa.Self());
+  ASSERT_TRUE(c != nullptr);
+  Handle<mirror::Object> obj0 = hs.NewHandle(c->AllocObject(soa.Self()));
+  ASSERT_TRUE(obj0.Get() != nullptr);
+
+  std::string error_msg;
+  IndirectReferenceTable irt(kTableMax,
+                             kLocal,
+                             IndirectReferenceTable::ResizableCapacity::kYes,
+                             &error_msg);
+  ASSERT_TRUE(irt.IsValid()) << error_msg;
+
+  CheckDump(&irt, 0, 0);
+  const IRTSegmentState cookie = kIRTFirstSegment;
+
+  for (size_t i = 0; i != kTableMax + 1; ++i) {
+    irt.Add(cookie, obj0.Get());
+  }
+
+  EXPECT_EQ(irt.Capacity(), kTableMax + 1);
+}
+
 }  // namespace art
diff --git a/runtime/instrumentation.cc b/runtime/instrumentation.cc
index a73970b..d4c322e 100644
--- a/runtime/instrumentation.cc
+++ b/runtime/instrumentation.cc
@@ -55,8 +55,8 @@
   explicit InstallStubsClassVisitor(Instrumentation* instrumentation)
       : instrumentation_(instrumentation) {}
 
-  bool operator()(mirror::Class* klass) OVERRIDE REQUIRES(Locks::mutator_lock_) {
-    instrumentation_->InstallStubsForClass(klass);
+  bool operator()(ObjPtr<mirror::Class> klass) OVERRIDE REQUIRES(Locks::mutator_lock_) {
+    instrumentation_->InstallStubsForClass(klass.Ptr());
     return true;  // we visit all classes.
   }
 
@@ -234,8 +234,8 @@
         CHECK_LT(instrumentation_stack_depth_, instrumentation_stack_->size());
         const InstrumentationStackFrame& frame =
             instrumentation_stack_->at(instrumentation_stack_depth_);
-        CHECK_EQ(m, frame.method_) << "Expected " << PrettyMethod(m)
-                                   << ", Found " << PrettyMethod(frame.method_);
+        CHECK_EQ(m, frame.method_) << "Expected " << ArtMethod::PrettyMethod(m)
+                                   << ", Found " << ArtMethod::PrettyMethod(frame.method_);
         return_pc = frame.return_pc_;
         if (kVerboseInstrumentation) {
           LOG(INFO) << "Ignoring already instrumented " << frame.Dump();
@@ -337,7 +337,7 @@
       if (GetCurrentQuickFrame() == nullptr) {
         if (kVerboseInstrumentation) {
           LOG(INFO) << "  Ignoring a shadow frame. Frame " << GetFrameId()
-              << " Method=" << PrettyMethod(m);
+              << " Method=" << ArtMethod::PrettyMethod(m);
         }
         return true;  // Ignore shadow frames.
       }
@@ -358,7 +358,7 @@
           if (instrumentation_frame.interpreter_entry_) {
             CHECK(m == Runtime::Current()->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs));
           } else {
-            CHECK(m == instrumentation_frame.method_) << PrettyMethod(m);
+            CHECK(m == instrumentation_frame.method_) << ArtMethod::PrettyMethod(m);
           }
           SetReturnPc(instrumentation_frame.return_pc_);
           if (instrumentation_->ShouldNotifyMethodEnterExitEvents()) {
@@ -647,7 +647,15 @@
   } else {
     MutexLock mu(self, *Locks::runtime_shutdown_lock_);
     SetQuickAllocEntryPointsInstrumented(instrumented);
-    ResetQuickAllocEntryPoints();
+
+    // Note: ResetQuickAllocEntryPoints only works when the runtime is started. Manually run the
+    //       update for just this thread.
+    // Note: self may be null. One of those paths is setting instrumentation in the Heap
+    //       constructor for gcstress mode.
+    if (self != nullptr) {
+      ResetQuickAllocEntryPointsForThread(self, nullptr);
+    }
+
     alloc_entrypoints_instrumented_ = instrumented;
   }
 }
@@ -765,7 +773,7 @@
   {
     WriterMutexLock mu(self, deoptimized_methods_lock_);
     bool has_not_been_deoptimized = AddDeoptimizedMethod(method);
-    CHECK(has_not_been_deoptimized) << "Method " << PrettyMethod(method)
+    CHECK(has_not_been_deoptimized) << "Method " << ArtMethod::PrettyMethod(method)
         << " is already deoptimized";
   }
   if (!interpreter_stubs_installed_) {
@@ -789,7 +797,7 @@
   {
     WriterMutexLock mu(self, deoptimized_methods_lock_);
     bool found_and_erased = RemoveDeoptimizedMethod(method);
-    CHECK(found_and_erased) << "Method " << PrettyMethod(method)
+    CHECK(found_and_erased) << "Method " << ArtMethod::PrettyMethod(method)
         << " is not deoptimized";
     empty = IsDeoptimizedMethodsEmpty();
   }
@@ -1035,7 +1043,8 @@
   size_t frame_id = StackVisitor::ComputeNumFrames(self, kInstrumentationStackWalk);
   std::deque<instrumentation::InstrumentationStackFrame>* stack = self->GetInstrumentationStack();
   if (kVerboseInstrumentation) {
-    LOG(INFO) << "Entering " << PrettyMethod(method) << " from PC " << reinterpret_cast<void*>(lr);
+    LOG(INFO) << "Entering " << ArtMethod::PrettyMethod(method) << " from PC "
+              << reinterpret_cast<void*>(lr);
   }
   instrumentation::InstrumentationStackFrame instrumentation_frame(this_object, method, lr,
                                                                    frame_id, interpreter_entry);
@@ -1090,8 +1099,8 @@
   if (deoptimize && Runtime::Current()->IsDeoptimizeable(*return_pc)) {
     if (kVerboseInstrumentation) {
       LOG(INFO) << StringPrintf("Deoptimizing %s by returning from %s with result %#" PRIx64 " in ",
-                                PrettyMethod(visitor.caller).c_str(),
-                                PrettyMethod(method).c_str(),
+                                visitor.caller->PrettyMethod().c_str(),
+                                method->PrettyMethod().c_str(),
                                 return_value.GetJ()) << *self;
     }
     self->PushDeoptimizationContext(return_value,
@@ -1102,7 +1111,7 @@
                                   reinterpret_cast<uintptr_t>(GetQuickDeoptimizationEntryPoint()));
   } else {
     if (kVerboseInstrumentation) {
-      LOG(INFO) << "Returning from " << PrettyMethod(method)
+      LOG(INFO) << "Returning from " << method->PrettyMethod()
                 << " to PC " << reinterpret_cast<void*>(*return_pc);
     }
     return GetTwoWordSuccessValue(0, *return_pc);
@@ -1120,11 +1129,11 @@
   ArtMethod* method = instrumentation_frame.method_;
   if (is_deoptimization) {
     if (kVerboseInstrumentation) {
-      LOG(INFO) << "Popping for deoptimization " << PrettyMethod(method);
+      LOG(INFO) << "Popping for deoptimization " << ArtMethod::PrettyMethod(method);
     }
   } else {
     if (kVerboseInstrumentation) {
-      LOG(INFO) << "Popping for unwind " << PrettyMethod(method);
+      LOG(INFO) << "Popping for unwind " << ArtMethod::PrettyMethod(method);
     }
 
     // Notify listeners of method unwind.
@@ -1138,7 +1147,7 @@
 
 std::string InstrumentationStackFrame::Dump() const {
   std::ostringstream os;
-  os << "Frame " << frame_id_ << " " << PrettyMethod(method_) << ":"
+  os << "Frame " << frame_id_ << " " << ArtMethod::PrettyMethod(method_) << ":"
       << reinterpret_cast<void*>(return_pc_) << " this=" << reinterpret_cast<void*>(this_object_);
   return os.str();
 }
diff --git a/runtime/intern_table.cc b/runtime/intern_table.cc
index be061be..9c05d3c 100644
--- a/runtime/intern_table.cc
+++ b/runtime/intern_table.cc
@@ -33,8 +33,7 @@
 namespace art {
 
 InternTable::InternTable()
-    : images_added_to_intern_table_(false),
-      log_new_roots_(false),
+    : log_new_roots_(false),
       weak_intern_condition_("New intern condition", *Locks::intern_table_lock_),
       weak_root_state_(gc::kWeakRootStateNormal) {
 }
@@ -64,9 +63,9 @@
     strong_interns_.VisitRoots(visitor);
   } else if ((flags & kVisitRootFlagNewRoots) != 0) {
     for (auto& root : new_strong_intern_roots_) {
-      mirror::String* old_ref = root.Read<kWithoutReadBarrier>();
+      ObjPtr<mirror::String> old_ref = root.Read<kWithoutReadBarrier>();
       root.VisitRoot(visitor, RootInfo(kRootInternedString));
-      mirror::String* new_ref = root.Read<kWithoutReadBarrier>();
+      ObjPtr<mirror::String> new_ref = root.Read<kWithoutReadBarrier>();
       if (new_ref != old_ref) {
         // The GC moved a root in the log. Need to search the strong interns and update the
         // corresponding object. This is slow, but luckily for us, this may only happen with a
@@ -87,17 +86,17 @@
   // Note: we deliberately don't visit the weak_interns_ table and the immutable image roots.
 }
 
-mirror::String* InternTable::LookupWeak(Thread* self, mirror::String* s) {
+ObjPtr<mirror::String> InternTable::LookupWeak(Thread* self, ObjPtr<mirror::String> s) {
   MutexLock mu(self, *Locks::intern_table_lock_);
   return LookupWeakLocked(s);
 }
 
-mirror::String* InternTable::LookupStrong(Thread* self, mirror::String* s) {
+ObjPtr<mirror::String> InternTable::LookupStrong(Thread* self, ObjPtr<mirror::String> s) {
   MutexLock mu(self, *Locks::intern_table_lock_);
   return LookupStrongLocked(s);
 }
 
-mirror::String* InternTable::LookupStrong(Thread* self,
+ObjPtr<mirror::String> InternTable::LookupStrong(Thread* self,
                                           uint32_t utf16_length,
                                           const char* utf8_data) {
   DCHECK_EQ(utf16_length, CountModifiedUtf8Chars(utf8_data));
@@ -108,11 +107,11 @@
   return strong_interns_.Find(string);
 }
 
-mirror::String* InternTable::LookupWeakLocked(mirror::String* s) {
+ObjPtr<mirror::String> InternTable::LookupWeakLocked(ObjPtr<mirror::String> s) {
   return weak_interns_.Find(s);
 }
 
-mirror::String* InternTable::LookupStrongLocked(mirror::String* s) {
+ObjPtr<mirror::String> InternTable::LookupStrongLocked(ObjPtr<mirror::String> s) {
   return strong_interns_.Find(s);
 }
 
@@ -122,7 +121,7 @@
   strong_interns_.AddNewTable();
 }
 
-mirror::String* InternTable::InsertStrong(mirror::String* s) {
+ObjPtr<mirror::String> InternTable::InsertStrong(ObjPtr<mirror::String> s) {
   Runtime* runtime = Runtime::Current();
   if (runtime->IsActiveTransaction()) {
     runtime->RecordStrongStringInsertion(s);
@@ -134,7 +133,7 @@
   return s;
 }
 
-mirror::String* InternTable::InsertWeak(mirror::String* s) {
+ObjPtr<mirror::String> InternTable::InsertWeak(ObjPtr<mirror::String> s) {
   Runtime* runtime = Runtime::Current();
   if (runtime->IsActiveTransaction()) {
     runtime->RecordWeakStringInsertion(s);
@@ -143,11 +142,11 @@
   return s;
 }
 
-void InternTable::RemoveStrong(mirror::String* s) {
+void InternTable::RemoveStrong(ObjPtr<mirror::String> s) {
   strong_interns_.Remove(s);
 }
 
-void InternTable::RemoveWeak(mirror::String* s) {
+void InternTable::RemoveWeak(ObjPtr<mirror::String> s) {
   Runtime* runtime = Runtime::Current();
   if (runtime->IsActiveTransaction()) {
     runtime->RecordWeakStringRemoval(s);
@@ -156,19 +155,22 @@
 }
 
 // Insert/remove methods used to undo changes made during an aborted transaction.
-mirror::String* InternTable::InsertStrongFromTransaction(mirror::String* s) {
+ObjPtr<mirror::String> InternTable::InsertStrongFromTransaction(ObjPtr<mirror::String> s) {
   DCHECK(!Runtime::Current()->IsActiveTransaction());
   return InsertStrong(s);
 }
-mirror::String* InternTable::InsertWeakFromTransaction(mirror::String* s) {
+
+ObjPtr<mirror::String> InternTable::InsertWeakFromTransaction(ObjPtr<mirror::String> s) {
   DCHECK(!Runtime::Current()->IsActiveTransaction());
   return InsertWeak(s);
 }
-void InternTable::RemoveStrongFromTransaction(mirror::String* s) {
+
+void InternTable::RemoveStrongFromTransaction(ObjPtr<mirror::String> s) {
   DCHECK(!Runtime::Current()->IsActiveTransaction());
   RemoveStrong(s);
 }
-void InternTable::RemoveWeakFromTransaction(mirror::String* s) {
+
+void InternTable::RemoveWeakFromTransaction(ObjPtr<mirror::String> s) {
   DCHECK(!Runtime::Current()->IsActiveTransaction());
   RemoveWeak(s);
 }
@@ -181,61 +183,11 @@
     const ImageSection& section = header->GetImageSection(ImageHeader::kSectionInternedStrings);
     if (section.Size() > 0) {
       AddTableFromMemoryLocked(image_space->Begin() + section.Offset());
-    } else {
-      // TODO: Delete this logic?
-      mirror::Object* root = header->GetImageRoot(ImageHeader::kDexCaches);
-      mirror::ObjectArray<mirror::DexCache>* dex_caches = root->AsObjectArray<mirror::DexCache>();
-      for (int32_t i = 0; i < dex_caches->GetLength(); ++i) {
-        mirror::DexCache* dex_cache = dex_caches->Get(i);
-        const size_t num_strings = dex_cache->NumStrings();
-        for (size_t j = 0; j < num_strings; ++j) {
-          mirror::String* image_string = dex_cache->GetResolvedString(j);
-          if (image_string != nullptr) {
-            mirror::String* found = LookupStrongLocked(image_string);
-            if (found == nullptr) {
-              InsertStrong(image_string);
-            } else {
-              DCHECK_EQ(found, image_string);
-            }
-          }
-        }
-      }
     }
   }
-  images_added_to_intern_table_ = true;
-}
-
-mirror::String* InternTable::LookupStringFromImage(mirror::String* s) {
-  DCHECK(!images_added_to_intern_table_);
-  const std::vector<gc::space::ImageSpace*>& image_spaces =
-      Runtime::Current()->GetHeap()->GetBootImageSpaces();
-  if (image_spaces.empty()) {
-    return nullptr;  // No image present.
-  }
-  const std::string utf8 = s->ToModifiedUtf8();
-  for (gc::space::ImageSpace* image_space : image_spaces) {
-    mirror::Object* root = image_space->GetImageHeader().GetImageRoot(ImageHeader::kDexCaches);
-    mirror::ObjectArray<mirror::DexCache>* dex_caches = root->AsObjectArray<mirror::DexCache>();
-    for (int32_t i = 0; i < dex_caches->GetLength(); ++i) {
-      mirror::DexCache* dex_cache = dex_caches->Get(i);
-      const DexFile* dex_file = dex_cache->GetDexFile();
-      // Binary search the dex file for the string index.
-      const DexFile::StringId* string_id = dex_file->FindStringId(utf8.c_str());
-      if (string_id != nullptr) {
-        uint32_t string_idx = dex_file->GetIndexForStringId(*string_id);
-        // GetResolvedString() contains a RB.
-        mirror::String* image_string = dex_cache->GetResolvedString(string_idx);
-        if (image_string != nullptr) {
-          return image_string;
-        }
-      }
-    }
-  }
-  return nullptr;
 }
 
 void InternTable::BroadcastForNewInterns() {
-  CHECK(kUseReadBarrier);
   Thread* self = Thread::Current();
   MutexLock mu(self, *Locks::intern_table_lock_);
   weak_intern_condition_.Broadcast(self);
@@ -246,14 +198,17 @@
   {
     ScopedThreadSuspension sts(self, kWaitingWeakGcRootRead);
     MutexLock mu(self, *Locks::intern_table_lock_);
-    while (weak_root_state_ == gc::kWeakRootStateNoReadsOrWrites) {
+    while ((!kUseReadBarrier && weak_root_state_ == gc::kWeakRootStateNoReadsOrWrites) ||
+           (kUseReadBarrier && !self->GetWeakRefAccessEnabled())) {
       weak_intern_condition_.Wait(self);
     }
   }
   Locks::intern_table_lock_->ExclusiveLock(self);
 }
 
-mirror::String* InternTable::Insert(mirror::String* s, bool is_strong, bool holding_locks) {
+ObjPtr<mirror::String> InternTable::Insert(ObjPtr<mirror::String> s,
+                                           bool is_strong,
+                                           bool holding_locks) {
   if (s == nullptr) {
     return nullptr;
   }
@@ -272,7 +227,7 @@
       }
     }
     // Check the strong table for a match.
-    mirror::String* strong = LookupStrongLocked(s);
+    ObjPtr<mirror::String> strong = LookupStrongLocked(s);
     if (strong != nullptr) {
       return strong;
     }
@@ -294,7 +249,7 @@
     CHECK(self->GetWeakRefAccessEnabled());
   }
   // There is no match in the strong table, check the weak table.
-  mirror::String* weak = LookupWeakLocked(s);
+  ObjPtr<mirror::String> weak = LookupWeakLocked(s);
   if (weak != nullptr) {
     if (is_strong) {
       // A match was found in the weak table. Promote to the strong table.
@@ -303,22 +258,15 @@
     }
     return weak;
   }
-  // Check the image for a match.
-  if (!images_added_to_intern_table_) {
-    mirror::String* const image_string = LookupStringFromImage(s);
-    if (image_string != nullptr) {
-      return is_strong ? InsertStrong(image_string) : InsertWeak(image_string);
-    }
-  }
   // No match in the strong table or the weak table. Insert into the strong / weak table.
   return is_strong ? InsertStrong(s) : InsertWeak(s);
 }
 
-mirror::String* InternTable::InternStrong(int32_t utf16_length, const char* utf8_data) {
+ObjPtr<mirror::String> InternTable::InternStrong(int32_t utf16_length, const char* utf8_data) {
   DCHECK(utf8_data != nullptr);
   Thread* self = Thread::Current();
   // Try to avoid allocation.
-  mirror::String* s = LookupStrong(self, utf16_length, utf8_data);
+  ObjPtr<mirror::String> s = LookupStrong(self, utf16_length, utf8_data);
   if (s != nullptr) {
     return s;
   }
@@ -326,25 +274,25 @@
       self, utf16_length, utf8_data));
 }
 
-mirror::String* InternTable::InternStrong(const char* utf8_data) {
+ObjPtr<mirror::String> InternTable::InternStrong(const char* utf8_data) {
   DCHECK(utf8_data != nullptr);
   return InternStrong(mirror::String::AllocFromModifiedUtf8(Thread::Current(), utf8_data));
 }
 
-mirror::String* InternTable::InternStrongImageString(mirror::String* s) {
+ObjPtr<mirror::String> InternTable::InternStrongImageString(ObjPtr<mirror::String> s) {
   // May be holding the heap bitmap lock.
   return Insert(s, true, true);
 }
 
-mirror::String* InternTable::InternStrong(mirror::String* s) {
+ObjPtr<mirror::String> InternTable::InternStrong(ObjPtr<mirror::String> s) {
   return Insert(s, true, false);
 }
 
-mirror::String* InternTable::InternWeak(mirror::String* s) {
+ObjPtr<mirror::String> InternTable::InternWeak(ObjPtr<mirror::String> s) {
   return Insert(s, false, false);
 }
 
-bool InternTable::ContainsWeak(mirror::String* s) {
+bool InternTable::ContainsWeak(ObjPtr<mirror::String> s) {
   return LookupWeak(Thread::Current(), s) == s;
 }
 
@@ -371,7 +319,7 @@
   if (kIsDebugBuild) {
     Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
   }
-  return static_cast<size_t>(root.Read()->GetHashCode());
+  return static_cast<size_t>(root.Read<kWithoutReadBarrier>()->GetHashCode());
 }
 
 bool InternTable::StringHashEquals::operator()(const GcRoot<mirror::String>& a,
@@ -379,7 +327,7 @@
   if (kIsDebugBuild) {
     Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
   }
-  return a.Read()->Equals(b.Read());
+  return a.Read<kWithoutReadBarrier>()->Equals(b.Read<kWithoutReadBarrier>());
 }
 
 bool InternTable::StringHashEquals::operator()(const GcRoot<mirror::String>& a,
@@ -387,7 +335,7 @@
   if (kIsDebugBuild) {
     Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
   }
-  mirror::String* a_string = a.Read();
+  ObjPtr<mirror::String> a_string = a.Read<kWithoutReadBarrier>();
   uint32_t a_length = static_cast<uint32_t>(a_string->GetLength());
   if (a_length != b.GetUtf16Length()) {
     return false;
@@ -449,7 +397,7 @@
   return table_to_write->WriteToMemory(ptr);
 }
 
-void InternTable::Table::Remove(mirror::String* s) {
+void InternTable::Table::Remove(ObjPtr<mirror::String> s) {
   for (UnorderedSet& table : tables_) {
     auto it = table.Find(GcRoot<mirror::String>(s));
     if (it != table.end()) {
@@ -460,7 +408,7 @@
   LOG(FATAL) << "Attempting to remove non-interned string " << s->ToModifiedUtf8();
 }
 
-mirror::String* InternTable::Table::Find(mirror::String* s) {
+ObjPtr<mirror::String> InternTable::Table::Find(ObjPtr<mirror::String> s) {
   Locks::intern_table_lock_->AssertHeld(Thread::Current());
   for (UnorderedSet& table : tables_) {
     auto it = table.Find(GcRoot<mirror::String>(s));
@@ -471,7 +419,7 @@
   return nullptr;
 }
 
-mirror::String* InternTable::Table::Find(const Utf8String& string) {
+ObjPtr<mirror::String> InternTable::Table::Find(const Utf8String& string) {
   Locks::intern_table_lock_->AssertHeld(Thread::Current());
   for (UnorderedSet& table : tables_) {
     auto it = table.Find(string);
@@ -486,7 +434,7 @@
   tables_.push_back(UnorderedSet());
 }
 
-void InternTable::Table::Insert(mirror::String* s) {
+void InternTable::Table::Insert(ObjPtr<mirror::String> s) {
   // Always insert the last table, the image tables are before and we avoid inserting into these
   // to prevent dirty pages.
   DCHECK(!tables_.empty());
diff --git a/runtime/intern_table.h b/runtime/intern_table.h
index 184fbdc..f661d9f 100644
--- a/runtime/intern_table.h
+++ b/runtime/intern_table.h
@@ -57,43 +57,44 @@
   InternTable();
 
   // Interns a potentially new string in the 'strong' table. May cause thread suspension.
-  mirror::String* InternStrong(int32_t utf16_length, const char* utf8_data)
+  ObjPtr<mirror::String> InternStrong(int32_t utf16_length, const char* utf8_data)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
   // Only used by image writer. Special version that may not cause thread suspension since the GC
   // cannot be running while we are doing image writing. Maybe be called while while holding a
   // lock since there will not be thread suspension.
-  mirror::String* InternStrongImageString(mirror::String* s)
+  ObjPtr<mirror::String> InternStrongImageString(ObjPtr<mirror::String> s)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Interns a potentially new string in the 'strong' table. May cause thread suspension.
-  mirror::String* InternStrong(const char* utf8_data) REQUIRES_SHARED(Locks::mutator_lock_)
+  ObjPtr<mirror::String> InternStrong(const char* utf8_data) REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Roles::uninterruptible_);
 
   // Interns a potentially new string in the 'strong' table. May cause thread suspension.
-  mirror::String* InternStrong(mirror::String* s) REQUIRES_SHARED(Locks::mutator_lock_)
+  ObjPtr<mirror::String> InternStrong(ObjPtr<mirror::String> s)
+      REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Roles::uninterruptible_);
 
   // Interns a potentially new string in the 'weak' table. May cause thread suspension.
-  mirror::String* InternWeak(mirror::String* s) REQUIRES_SHARED(Locks::mutator_lock_)
+  ObjPtr<mirror::String> InternWeak(ObjPtr<mirror::String> s) REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Roles::uninterruptible_);
 
   void SweepInternTableWeaks(IsMarkedVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::intern_table_lock_);
 
-  bool ContainsWeak(mirror::String* s) REQUIRES_SHARED(Locks::mutator_lock_)
+  bool ContainsWeak(ObjPtr<mirror::String> s) REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::intern_table_lock_);
 
   // Lookup a strong intern, returns null if not found.
-  mirror::String* LookupStrong(Thread* self, mirror::String* s)
+  ObjPtr<mirror::String> LookupStrong(Thread* self, ObjPtr<mirror::String> s)
       REQUIRES(!Locks::intern_table_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
-  mirror::String* LookupStrong(Thread* self, uint32_t utf16_length, const char* utf8_data)
+  ObjPtr<mirror::String> LookupStrong(Thread* self, uint32_t utf16_length, const char* utf8_data)
       REQUIRES(!Locks::intern_table_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Lookup a weak intern, returns null if not found.
-  mirror::String* LookupWeak(Thread* self, mirror::String* s)
+  ObjPtr<mirror::String> LookupWeak(Thread* self, ObjPtr<mirror::String> s)
       REQUIRES(!Locks::intern_table_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -111,7 +112,7 @@
 
   void DumpForSigQuit(std::ostream& os) const REQUIRES(!Locks::intern_table_lock_);
 
-  void BroadcastForNewInterns() REQUIRES_SHARED(Locks::mutator_lock_);
+  void BroadcastForNewInterns();
 
   // Adds all of the resolved image strings from the image spaces into the intern table. The
   // advantage of doing this is preventing expensive DexFile::FindStringId calls. Sets
@@ -181,13 +182,13 @@
   class Table {
    public:
     Table();
-    mirror::String* Find(mirror::String* s) REQUIRES_SHARED(Locks::mutator_lock_)
+    ObjPtr<mirror::String> Find(ObjPtr<mirror::String> s) REQUIRES_SHARED(Locks::mutator_lock_)
         REQUIRES(Locks::intern_table_lock_);
-    mirror::String* Find(const Utf8String& string) REQUIRES_SHARED(Locks::mutator_lock_)
+    ObjPtr<mirror::String> Find(const Utf8String& string) REQUIRES_SHARED(Locks::mutator_lock_)
         REQUIRES(Locks::intern_table_lock_);
-    void Insert(mirror::String* s) REQUIRES_SHARED(Locks::mutator_lock_)
+    void Insert(ObjPtr<mirror::String> s) REQUIRES_SHARED(Locks::mutator_lock_)
         REQUIRES(Locks::intern_table_lock_);
-    void Remove(mirror::String* s)
+    void Remove(ObjPtr<mirror::String> s)
         REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
     void VisitRoots(RootVisitor* visitor)
         REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
@@ -221,32 +222,30 @@
   // Insert if non null, otherwise return null. Must be called holding the mutator lock.
   // If holding_locks is true, then we may also hold other locks. If holding_locks is true, then we
   // require GC is not running since it is not safe to wait while holding locks.
-  mirror::String* Insert(mirror::String* s, bool is_strong, bool holding_locks)
+  ObjPtr<mirror::String> Insert(ObjPtr<mirror::String> s, bool is_strong, bool holding_locks)
       REQUIRES(!Locks::intern_table_lock_) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  mirror::String* LookupStrongLocked(mirror::String* s)
+  ObjPtr<mirror::String> LookupStrongLocked(ObjPtr<mirror::String> s)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
-  mirror::String* LookupWeakLocked(mirror::String* s)
+  ObjPtr<mirror::String> LookupWeakLocked(ObjPtr<mirror::String> s)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
-  mirror::String* InsertStrong(mirror::String* s)
+  ObjPtr<mirror::String> InsertStrong(ObjPtr<mirror::String> s)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
-  mirror::String* InsertWeak(mirror::String* s)
+  ObjPtr<mirror::String> InsertWeak(ObjPtr<mirror::String> s)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
-  void RemoveStrong(mirror::String* s)
+  void RemoveStrong(ObjPtr<mirror::String> s)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
-  void RemoveWeak(mirror::String* s)
+  void RemoveWeak(ObjPtr<mirror::String> s)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
 
   // Transaction rollback access.
-  mirror::String* LookupStringFromImage(mirror::String* s)
+  ObjPtr<mirror::String> InsertStrongFromTransaction(ObjPtr<mirror::String> s)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
-  mirror::String* InsertStrongFromTransaction(mirror::String* s)
+  ObjPtr<mirror::String> InsertWeakFromTransaction(ObjPtr<mirror::String> s)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
-  mirror::String* InsertWeakFromTransaction(mirror::String* s)
+  void RemoveStrongFromTransaction(ObjPtr<mirror::String> s)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
-  void RemoveStrongFromTransaction(mirror::String* s)
-      REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
-  void RemoveWeakFromTransaction(mirror::String* s)
+  void RemoveWeakFromTransaction(ObjPtr<mirror::String> s)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::intern_table_lock_);
 
   size_t AddTableFromMemoryLocked(const uint8_t* ptr)
@@ -260,7 +259,6 @@
   void WaitUntilAccessible(Thread* self)
       REQUIRES(Locks::intern_table_lock_) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  bool images_added_to_intern_table_ GUARDED_BY(Locks::intern_table_lock_);
   bool log_new_roots_ GUARDED_BY(Locks::intern_table_lock_);
   ConditionVariable weak_intern_condition_ GUARDED_BY(Locks::intern_table_lock_);
   // Since this contains (strong) roots, they need a read barrier to
diff --git a/runtime/intern_table_test.cc b/runtime/intern_table_test.cc
index 74cec57..b91d946 100644
--- a/runtime/intern_table_test.cc
+++ b/runtime/intern_table_test.cc
@@ -193,22 +193,22 @@
   ASSERT_NE(foo.Get(), bar.Get());
   ASSERT_NE(foo.Get(), foobar.Get());
   ASSERT_NE(bar.Get(), foobar.Get());
-  mirror::String* lookup_foo = intern_table.LookupStrong(soa.Self(), 3, "foo");
-  EXPECT_EQ(lookup_foo, foo.Get());
-  mirror::String* lookup_bar = intern_table.LookupStrong(soa.Self(), 3, "bar");
-  EXPECT_EQ(lookup_bar, bar.Get());
-  mirror::String* lookup_foobar = intern_table.LookupStrong(soa.Self(), 6, "foobar");
-  EXPECT_EQ(lookup_foobar, foobar.Get());
-  mirror::String* lookup_foox = intern_table.LookupStrong(soa.Self(), 4, "foox");
+  ObjPtr<mirror::String> lookup_foo = intern_table.LookupStrong(soa.Self(), 3, "foo");
+  EXPECT_OBJ_PTR_EQ(lookup_foo, foo.Get());
+  ObjPtr<mirror::String> lookup_bar = intern_table.LookupStrong(soa.Self(), 3, "bar");
+  EXPECT_OBJ_PTR_EQ(lookup_bar, bar.Get());
+  ObjPtr<mirror::String> lookup_foobar = intern_table.LookupStrong(soa.Self(), 6, "foobar");
+  EXPECT_OBJ_PTR_EQ(lookup_foobar, foobar.Get());
+  ObjPtr<mirror::String> lookup_foox = intern_table.LookupStrong(soa.Self(), 4, "foox");
   EXPECT_TRUE(lookup_foox == nullptr);
-  mirror::String* lookup_fooba = intern_table.LookupStrong(soa.Self(), 5, "fooba");
+  ObjPtr<mirror::String> lookup_fooba = intern_table.LookupStrong(soa.Self(), 5, "fooba");
   EXPECT_TRUE(lookup_fooba == nullptr);
-  mirror::String* lookup_foobaR = intern_table.LookupStrong(soa.Self(), 6, "foobaR");
+  ObjPtr<mirror::String> lookup_foobaR = intern_table.LookupStrong(soa.Self(), 6, "foobaR");
   EXPECT_TRUE(lookup_foobaR == nullptr);
   // Try a hash conflict.
   ASSERT_EQ(ComputeUtf16HashFromModifiedUtf8("foobar", 6),
             ComputeUtf16HashFromModifiedUtf8("foobbS", 6));
-  mirror::String* lookup_foobbS = intern_table.LookupStrong(soa.Self(), 6, "foobbS");
+  ObjPtr<mirror::String> lookup_foobbS = intern_table.LookupStrong(soa.Self(), 6, "foobbS");
   EXPECT_TRUE(lookup_foobbS == nullptr);
 }
 
diff --git a/runtime/interpreter/interpreter.cc b/runtime/interpreter/interpreter.cc
index d283a50..a32c800 100644
--- a/runtime/interpreter/interpreter.cc
+++ b/runtime/interpreter/interpreter.cc
@@ -22,6 +22,7 @@
 #include "interpreter_common.h"
 #include "interpreter_mterp_impl.h"
 #include "interpreter_switch_impl.h"
+#include "jvalue-inl.h"
 #include "mirror/string-inl.h"
 #include "scoped_thread_state_change-inl.h"
 #include "ScopedLocalRef.h"
@@ -34,8 +35,17 @@
 namespace art {
 namespace interpreter {
 
-static void InterpreterJni(Thread* self, ArtMethod* method, const StringPiece& shorty,
-                           Object* receiver, uint32_t* args, JValue* result)
+ALWAYS_INLINE static ObjPtr<mirror::Object> ObjArg(uint32_t arg)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return ObjPtr<mirror::Object>(reinterpret_cast<mirror::Object*>(arg));
+}
+
+static void InterpreterJni(Thread* self,
+                           ArtMethod* method,
+                           const StringPiece& shorty,
+                           ObjPtr<mirror::Object> receiver,
+                           uint32_t* args,
+                           JValue* result)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   // TODO: The following enters JNI code using a typedef-ed function rather than the JNI compiler,
   //       it should be removed and JNI compiled stubs used instead.
@@ -51,7 +61,7 @@
         ScopedThreadStateChange tsc(self, kNative);
         jresult = fn(soa.Env(), klass.get());
       }
-      result->SetL(soa.Decode<Object>(jresult).Ptr());
+      result->SetL(soa.Decode<mirror::Object>(jresult));
     } else if (shorty == "V") {
       typedef void (fntype)(JNIEnv*, jclass);
       fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());
@@ -86,14 +96,13 @@
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedLocalRef<jobject> arg0(soa.Env(),
-                                   soa.AddLocalReference<jobject>(
-                                       reinterpret_cast<Object*>(args[0])));
+                                   soa.AddLocalReference<jobject>(ObjArg(args[0])));
       jobject jresult;
       {
         ScopedThreadStateChange tsc(self, kNative);
         jresult = fn(soa.Env(), klass.get(), arg0.get());
       }
-      result->SetL(soa.Decode<Object>(jresult).Ptr());
+      result->SetL(soa.Decode<mirror::Object>(jresult));
     } else if (shorty == "IIZ") {
       typedef jint (fntype)(JNIEnv*, jclass, jint, jboolean);
       fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());
@@ -108,8 +117,7 @@
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedLocalRef<jobject> arg0(soa.Env(),
-                                   soa.AddLocalReference<jobject>(
-                                       reinterpret_cast<Object*>(args[0])));
+                                   soa.AddLocalReference<jobject>(ObjArg(args[0])));
       ScopedThreadStateChange tsc(self, kNative);
       result->SetI(fn(soa.Env(), klass.get(), arg0.get(), args[1]));
     } else if (shorty == "SIZ") {
@@ -133,11 +141,9 @@
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedLocalRef<jobject> arg0(soa.Env(),
-                                   soa.AddLocalReference<jobject>(
-                                       reinterpret_cast<Object*>(args[0])));
+                                   soa.AddLocalReference<jobject>(ObjArg(args[0])));
       ScopedLocalRef<jobject> arg1(soa.Env(),
-                                   soa.AddLocalReference<jobject>(
-                                       reinterpret_cast<Object*>(args[1])));
+                                   soa.AddLocalReference<jobject>(ObjArg(args[1])));
       ScopedThreadStateChange tsc(self, kNative);
       result->SetZ(fn(soa.Env(), klass.get(), arg0.get(), arg1.get()));
     } else if (shorty == "ZILL") {
@@ -146,11 +152,9 @@
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedLocalRef<jobject> arg1(soa.Env(),
-                                   soa.AddLocalReference<jobject>(
-                                       reinterpret_cast<Object*>(args[1])));
+                                   soa.AddLocalReference<jobject>(ObjArg(args[1])));
       ScopedLocalRef<jobject> arg2(soa.Env(),
-                                   soa.AddLocalReference<jobject>(
-                                       reinterpret_cast<Object*>(args[2])));
+                                   soa.AddLocalReference<jobject>(ObjArg(args[2])));
       ScopedThreadStateChange tsc(self, kNative);
       result->SetZ(fn(soa.Env(), klass.get(), args[0], arg1.get(), arg2.get()));
     } else if (shorty == "VILII") {
@@ -159,8 +163,7 @@
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedLocalRef<jobject> arg1(soa.Env(),
-                                   soa.AddLocalReference<jobject>(
-                                       reinterpret_cast<Object*>(args[1])));
+                                   soa.AddLocalReference<jobject>(ObjArg(args[1])));
       ScopedThreadStateChange tsc(self, kNative);
       fn(soa.Env(), klass.get(), args[0], arg1.get(), args[2], args[3]);
     } else if (shorty == "VLILII") {
@@ -169,15 +172,13 @@
       ScopedLocalRef<jclass> klass(soa.Env(),
                                    soa.AddLocalReference<jclass>(method->GetDeclaringClass()));
       ScopedLocalRef<jobject> arg0(soa.Env(),
-                                   soa.AddLocalReference<jobject>(
-                                       reinterpret_cast<Object*>(args[0])));
+                                   soa.AddLocalReference<jobject>(ObjArg(args[0])));
       ScopedLocalRef<jobject> arg2(soa.Env(),
-                                   soa.AddLocalReference<jobject>(
-                                       reinterpret_cast<Object*>(args[2])));
+                                   soa.AddLocalReference<jobject>(ObjArg(args[2])));
       ScopedThreadStateChange tsc(self, kNative);
       fn(soa.Env(), klass.get(), arg0.get(), args[1], arg2.get(), args[3], args[4]);
     } else {
-      LOG(FATAL) << "Do something with static native method: " << PrettyMethod(method)
+      LOG(FATAL) << "Do something with static native method: " << method->PrettyMethod()
           << " shorty: " << shorty;
     }
   } else {
@@ -191,7 +192,7 @@
         ScopedThreadStateChange tsc(self, kNative);
         jresult = fn(soa.Env(), rcvr.get());
       }
-      result->SetL(soa.Decode<Object>(jresult).Ptr());
+      result->SetL(soa.Decode<mirror::Object>(jresult));
     } else if (shorty == "V") {
       typedef void (fntype)(JNIEnv*, jobject);
       fntype* const fn = reinterpret_cast<fntype*>(method->GetEntryPointFromJni());
@@ -205,14 +206,13 @@
       ScopedLocalRef<jobject> rcvr(soa.Env(),
                                    soa.AddLocalReference<jobject>(receiver));
       ScopedLocalRef<jobject> arg0(soa.Env(),
-                                   soa.AddLocalReference<jobject>(
-                                       reinterpret_cast<Object*>(args[0])));
+                                   soa.AddLocalReference<jobject>(ObjArg(args[0])));
       jobject jresult;
       {
         ScopedThreadStateChange tsc(self, kNative);
         jresult = fn(soa.Env(), rcvr.get(), arg0.get());
       }
-      result->SetL(soa.Decode<Object>(jresult).Ptr());
+      result->SetL(soa.Decode<mirror::Object>(jresult));
       ScopedThreadStateChange tsc(self, kNative);
     } else if (shorty == "III") {
       typedef jint (fntype)(JNIEnv*, jobject, jint, jint);
@@ -222,7 +222,7 @@
       ScopedThreadStateChange tsc(self, kNative);
       result->SetI(fn(soa.Env(), rcvr.get(), args[0], args[1]));
     } else {
-      LOG(FATAL) << "Do something with native method: " << PrettyMethod(method)
+      LOG(FATAL) << "Do something with native method: " << method->PrettyMethod()
           << " shorty: " << shorty;
     }
   }
@@ -311,7 +311,7 @@
           } else {
             // Mterp didn't like that instruction.  Single-step it with the reference interpreter.
             result_register = ExecuteSwitchImpl<false, false>(self, code_item, shadow_frame,
-                                                               result_register, true);
+                                                              result_register, true);
             if (shadow_frame.GetDexPC() == DexFile::kDexNoIndex) {
               // Single-stepped a return or an exception not handled locally.  Return to caller.
               return result_register;
@@ -353,8 +353,11 @@
   }
 }
 
-void EnterInterpreterFromInvoke(Thread* self, ArtMethod* method, Object* receiver,
-                                uint32_t* args, JValue* result,
+void EnterInterpreterFromInvoke(Thread* self,
+                                ArtMethod* method,
+                                ObjPtr<mirror::Object> receiver,
+                                uint32_t* args,
+                                JValue* result,
                                 bool stay_in_interpreter) {
   DCHECK_EQ(self, Thread::Current());
   bool implicit_check = !Runtime::Current()->ExplicitStackOverflowChecks();
@@ -392,7 +395,7 @@
   size_t cur_reg = num_regs - num_ins;
   if (!method->IsStatic()) {
     CHECK(receiver != nullptr);
-    shadow_frame->SetVRegReference(cur_reg, receiver);
+    shadow_frame->SetVRegReference(cur_reg, receiver.Ptr());
     ++cur_reg;
   }
   uint32_t shorty_len = 0;
@@ -401,8 +404,9 @@
     DCHECK_LT(shorty_pos + 1, shorty_len);
     switch (shorty[shorty_pos + 1]) {
       case 'L': {
-        Object* o = reinterpret_cast<StackReference<Object>*>(&args[arg_pos])->AsMirrorPtr();
-        shadow_frame->SetVRegReference(cur_reg, o);
+        ObjPtr<mirror::Object> o =
+            reinterpret_cast<StackReference<mirror::Object>*>(&args[arg_pos])->AsMirrorPtr();
+        shadow_frame->SetVRegReference(cur_reg, o.Ptr());
         break;
       }
       case 'J': case 'D': {
@@ -441,7 +445,7 @@
     // references pointers due to moving GC.
     args = shadow_frame->GetVRegArgs(method->IsStatic() ? 0 : 1);
     if (!Runtime::Current()->IsStarted()) {
-      UnstartedRuntime::Jni(self, method, receiver, args, result);
+      UnstartedRuntime::Jni(self, method, receiver.Ptr(), args, result);
     } else {
       InterpreterJni(self, method, shorty, receiver, args, result);
     }
@@ -538,14 +542,14 @@
         if (kIsDebugBuild) {
           ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
           // This is a suspend point. But it's ok since value has been set into shadow_frame.
-          mirror::Class* klass = class_linker->ResolveType(
+          ObjPtr<mirror::Class> klass = class_linker->ResolveType(
               instr->VRegB_21c(), shadow_frame->GetMethod());
           DCHECK(klass->IsStringClass());
         }
       } else {
         CHECK(false) << "Unexpected instruction opcode " << instr->Opcode()
                      << " at dex_pc " << dex_pc
-                     << " of method: " << PrettyMethod(shadow_frame->GetMethod(), false);
+                     << " of method: " << ArtMethod::PrettyMethod(shadow_frame->GetMethod(), false);
       }
     } else {
       // Nothing to do, the dex_pc is the one at which the code requested
@@ -581,8 +585,10 @@
   return Execute(self, code_item, *shadow_frame, JValue());
 }
 
-void ArtInterpreterToInterpreterBridge(Thread* self, const DexFile::CodeItem* code_item,
-                                       ShadowFrame* shadow_frame, JValue* result) {
+void ArtInterpreterToInterpreterBridge(Thread* self,
+                                       const DexFile::CodeItem* code_item,
+                                       ShadowFrame* shadow_frame,
+                                       JValue* result) {
   bool implicit_check = !Runtime::Current()->ExplicitStackOverflowChecks();
   if (UNLIKELY(__builtin_frame_address(0) < self->GetStackEndForInterpreter(implicit_check))) {
     ThrowStackOverflowError(self);
@@ -594,10 +600,10 @@
   // Ensure static methods are initialized.
   const bool is_static = method->IsStatic();
   if (is_static) {
-    mirror::Class* declaring_class = method->GetDeclaringClass();
+    ObjPtr<mirror::Class> declaring_class = method->GetDeclaringClass();
     if (UNLIKELY(!declaring_class->IsInitialized())) {
       StackHandleScope<1> hs(self);
-      HandleWrapper<Class> h_declaring_class(hs.NewHandleWrapper(&declaring_class));
+      HandleWrapperObjPtr<mirror::Class> h_declaring_class(hs.NewHandleWrapper(&declaring_class));
       if (UNLIKELY(!Runtime::Current()->GetClassLinker()->EnsureInitialized(
           self, h_declaring_class, true, true))) {
         DCHECK(self->IsExceptionPending());
@@ -614,9 +620,9 @@
     // We don't expect to be asked to interpret native code (which is entered via a JNI compiler
     // generated stub) except during testing and image writing.
     CHECK(!Runtime::Current()->IsStarted());
-    Object* receiver = is_static ? nullptr : shadow_frame->GetVRegReference(0);
+    ObjPtr<mirror::Object> receiver = is_static ? nullptr : shadow_frame->GetVRegReference(0);
     uint32_t* args = shadow_frame->GetVRegArgs(is_static ? 0 : 1);
-    UnstartedRuntime::Jni(self, shadow_frame->GetMethod(), receiver, args, result);
+    UnstartedRuntime::Jni(self, shadow_frame->GetMethod(), receiver.Ptr(), args, result);
   }
 
   self->PopShadowFrame();
diff --git a/runtime/interpreter/interpreter.h b/runtime/interpreter/interpreter.h
index 38ce851..65cfade 100644
--- a/runtime/interpreter/interpreter.h
+++ b/runtime/interpreter/interpreter.h
@@ -19,6 +19,7 @@
 
 #include "base/mutex.h"
 #include "dex_file.h"
+#include "obj_ptr.h"
 
 namespace art {
 namespace mirror {
@@ -36,7 +37,9 @@
 // The optional stay_in_interpreter parameter (false by default) can be used by clients to
 // explicitly force interpretation in the remaining path that implements method invocation.
 extern void EnterInterpreterFromInvoke(Thread* self, ArtMethod* method,
-                                       mirror::Object* receiver, uint32_t* args, JValue* result,
+                                       ObjPtr<mirror::Object> receiver,
+                                       uint32_t* args,
+                                       JValue* result,
                                        bool stay_in_interpreter = false)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
diff --git a/runtime/interpreter/interpreter_common.cc b/runtime/interpreter/interpreter_common.cc
index db7ebb4..8c63a9e 100644
--- a/runtime/interpreter/interpreter_common.cc
+++ b/runtime/interpreter/interpreter_common.cc
@@ -24,14 +24,17 @@
 #include "jit/jit.h"
 #include "jvalue.h"
 #include "method_handles.h"
+#include "method_handles-inl.h"
 #include "mirror/array-inl.h"
 #include "mirror/class.h"
+#include "mirror/emulated_stack_frame.h"
 #include "mirror/method_handle_impl.h"
 #include "reflection.h"
 #include "reflection-inl.h"
 #include "stack.h"
 #include "unstarted_runtime.h"
 #include "verifier/method_verifier.h"
+#include "well_known_classes.h"
 
 namespace art {
 namespace interpreter {
@@ -40,6 +43,60 @@
   ThrowNullPointerExceptionFromDexPC();
 }
 
+template<Primitive::Type field_type>
+static ALWAYS_INLINE void DoFieldGetCommon(Thread* self,
+                                           const ShadowFrame& shadow_frame,
+                                           ObjPtr<mirror::Object>& obj,
+                                           ArtField* field,
+                                           JValue* result)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  field->GetDeclaringClass()->AssertInitializedOrInitializingInThread(self);
+
+  // Report this field access to instrumentation if needed.
+  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
+  if (UNLIKELY(instrumentation->HasFieldReadListeners())) {
+    StackHandleScope<1> hs(self);
+    // Wrap in handle wrapper in case the listener does thread suspension.
+    HandleWrapperObjPtr<mirror::Object> h(hs.NewHandleWrapper(&obj));
+    ObjPtr<mirror::Object> this_object;
+    if (!field->IsStatic()) {
+      this_object = obj;
+    }
+    instrumentation->FieldReadEvent(self,
+                                    this_object.Ptr(),
+                                    shadow_frame.GetMethod(),
+                                    shadow_frame.GetDexPC(),
+                                    field);
+  }
+
+  switch (field_type) {
+    case Primitive::kPrimBoolean:
+      result->SetZ(field->GetBoolean(obj));
+      break;
+    case Primitive::kPrimByte:
+      result->SetB(field->GetByte(obj));
+      break;
+    case Primitive::kPrimChar:
+      result->SetC(field->GetChar(obj));
+      break;
+    case Primitive::kPrimShort:
+      result->SetS(field->GetShort(obj));
+      break;
+    case Primitive::kPrimInt:
+      result->SetI(field->GetInt(obj));
+      break;
+    case Primitive::kPrimLong:
+      result->SetJ(field->GetLong(obj));
+      break;
+    case Primitive::kPrimNot:
+      result->SetL(field->GetObject(obj));
+      break;
+    default:
+      LOG(FATAL) << "Unreachable: " << field_type;
+      UNREACHABLE();
+  }
+}
+
 template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check>
 bool DoFieldGet(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,
                 uint16_t inst_data) {
@@ -52,7 +109,7 @@
     CHECK(self->IsExceptionPending());
     return false;
   }
-  ObjPtr<Object> obj;
+  ObjPtr<mirror::Object> obj;
   if (is_static) {
     obj = f->GetDeclaringClass();
   } else {
@@ -62,45 +119,31 @@
       return false;
     }
   }
-  f->GetDeclaringClass()->AssertInitializedOrInitializingInThread(self);
-  // Report this field access to instrumentation if needed.
-  instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
-  if (UNLIKELY(instrumentation->HasFieldReadListeners())) {
-    StackHandleScope<1> hs(self);
-    // Wrap in handle wrapper in case the listener does thread suspension.
-    HandleWrapperObjPtr<mirror::Object> h(hs.NewHandleWrapper(&obj));
-    ObjPtr<Object> this_object;
-    if (!f->IsStatic()) {
-      this_object = obj;
-    }
-    instrumentation->FieldReadEvent(self,
-                                    this_object.Ptr(),
-                                    shadow_frame.GetMethod(),
-                                    shadow_frame.GetDexPC(),
-                                    f);
-  }
+
+  JValue result;
+  DoFieldGetCommon<field_type>(self, shadow_frame, obj, f, &result);
   uint32_t vregA = is_static ? inst->VRegA_21c(inst_data) : inst->VRegA_22c(inst_data);
   switch (field_type) {
     case Primitive::kPrimBoolean:
-      shadow_frame.SetVReg(vregA, f->GetBoolean(obj));
+      shadow_frame.SetVReg(vregA, result.GetZ());
       break;
     case Primitive::kPrimByte:
-      shadow_frame.SetVReg(vregA, f->GetByte(obj));
+      shadow_frame.SetVReg(vregA, result.GetB());
       break;
     case Primitive::kPrimChar:
-      shadow_frame.SetVReg(vregA, f->GetChar(obj));
+      shadow_frame.SetVReg(vregA, result.GetC());
       break;
     case Primitive::kPrimShort:
-      shadow_frame.SetVReg(vregA, f->GetShort(obj));
+      shadow_frame.SetVReg(vregA, result.GetS());
       break;
     case Primitive::kPrimInt:
-      shadow_frame.SetVReg(vregA, f->GetInt(obj));
+      shadow_frame.SetVReg(vregA, result.GetI());
       break;
     case Primitive::kPrimLong:
-      shadow_frame.SetVRegLong(vregA, f->GetLong(obj));
+      shadow_frame.SetVRegLong(vregA, result.GetJ());
       break;
     case Primitive::kPrimNot:
-      shadow_frame.SetVRegReference(vregA, f->GetObject(obj).Ptr());
+      shadow_frame.SetVRegReference(vregA, result.GetL());
       break;
     default:
       LOG(FATAL) << "Unreachable: " << field_type;
@@ -141,11 +184,53 @@
 #undef EXPLICIT_DO_FIELD_GET_ALL_TEMPLATE_DECL
 #undef EXPLICIT_DO_FIELD_GET_TEMPLATE_DECL
 
+// Helper for getters in invoke-polymorphic.
+inline static void DoFieldGetForInvokePolymorphic(Thread* self,
+                                                  const ShadowFrame& shadow_frame,
+                                                  ObjPtr<mirror::Object>& obj,
+                                                  ArtField* field,
+                                                  Primitive::Type field_type,
+                                                  JValue* result)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  switch (field_type) {
+    case Primitive::kPrimBoolean:
+      DoFieldGetCommon<Primitive::kPrimBoolean>(self, shadow_frame, obj, field, result);
+      break;
+    case Primitive::kPrimByte:
+      DoFieldGetCommon<Primitive::kPrimByte>(self, shadow_frame, obj, field, result);
+      break;
+    case Primitive::kPrimChar:
+      DoFieldGetCommon<Primitive::kPrimChar>(self, shadow_frame, obj, field, result);
+      break;
+    case Primitive::kPrimShort:
+      DoFieldGetCommon<Primitive::kPrimShort>(self, shadow_frame, obj, field, result);
+      break;
+    case Primitive::kPrimInt:
+      DoFieldGetCommon<Primitive::kPrimInt>(self, shadow_frame, obj, field, result);
+      break;
+    case Primitive::kPrimLong:
+      DoFieldGetCommon<Primitive::kPrimLong>(self, shadow_frame, obj, field, result);
+      break;
+    case Primitive::kPrimFloat:
+      DoFieldGetCommon<Primitive::kPrimInt>(self, shadow_frame, obj, field, result);
+      break;
+    case Primitive::kPrimDouble:
+      DoFieldGetCommon<Primitive::kPrimLong>(self, shadow_frame, obj, field, result);
+      break;
+    case Primitive::kPrimNot:
+      DoFieldGetCommon<Primitive::kPrimNot>(self, shadow_frame, obj, field, result);
+      break;
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable: " << field_type;
+      UNREACHABLE();
+  }
+}
+
 // Handles iget-quick, iget-wide-quick and iget-object-quick instructions.
 // Returns true on success, otherwise throws an exception and returns false.
 template<Primitive::Type field_type>
 bool DoIGetQuick(ShadowFrame& shadow_frame, const Instruction* inst, uint16_t inst_data) {
-  Object* obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
+  ObjPtr<mirror::Object> obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
   if (UNLIKELY(obj == nullptr)) {
     // We lost the reference to the field index so we cannot get a more
     // precised exception message.
@@ -161,8 +246,14 @@
                                                         field_offset.Uint32Value());
     DCHECK(f != nullptr);
     DCHECK(!f->IsStatic());
-    instrumentation->FieldReadEvent(Thread::Current(), obj, shadow_frame.GetMethod(),
-                                    shadow_frame.GetDexPC(), f);
+    StackHandleScope<1> hs(Thread::Current());
+    // Save obj in case the instrumentation event has thread suspension.
+    HandleWrapperObjPtr<mirror::Object> h = hs.NewHandleWrapper(&obj);
+    instrumentation->FieldReadEvent(Thread::Current(),
+                                    obj.Ptr(),
+                                    shadow_frame.GetMethod(),
+                                    shadow_frame.GetDexPC(),
+                                    f);
   }
   // Note: iget-x-quick instructions are only for non-volatile fields.
   const uint32_t vregA = inst->VRegA_22c(inst_data);
@@ -209,6 +300,42 @@
 EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL(Primitive::kPrimNot);      // iget-object-quick.
 #undef EXPLICIT_DO_IGET_QUICK_TEMPLATE_DECL
 
+static JValue GetFieldValue(const ShadowFrame& shadow_frame,
+                            Primitive::Type field_type,
+                            uint32_t vreg)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  JValue field_value;
+  switch (field_type) {
+    case Primitive::kPrimBoolean:
+      field_value.SetZ(static_cast<uint8_t>(shadow_frame.GetVReg(vreg)));
+      break;
+    case Primitive::kPrimByte:
+      field_value.SetB(static_cast<int8_t>(shadow_frame.GetVReg(vreg)));
+      break;
+    case Primitive::kPrimChar:
+      field_value.SetC(static_cast<uint16_t>(shadow_frame.GetVReg(vreg)));
+      break;
+    case Primitive::kPrimShort:
+      field_value.SetS(static_cast<int16_t>(shadow_frame.GetVReg(vreg)));
+      break;
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      field_value.SetI(shadow_frame.GetVReg(vreg));
+      break;
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
+      field_value.SetJ(shadow_frame.GetVRegLong(vreg));
+      break;
+    case Primitive::kPrimNot:
+      field_value.SetL(shadow_frame.GetVRegReference(vreg));
+      break;
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable: " << field_type;
+      UNREACHABLE();
+  }
+  return field_value;
+}
+
 template<Primitive::Type field_type>
 static JValue GetFieldValue(const ShadowFrame& shadow_frame, uint32_t vreg)
     REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -242,32 +369,15 @@
   return field_value;
 }
 
-template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check,
-         bool transaction_active>
-bool DoFieldPut(Thread* self, const ShadowFrame& shadow_frame, const Instruction* inst,
-                uint16_t inst_data) {
-  bool do_assignability_check = do_access_check;
-  bool is_static = (find_type == StaticObjectWrite) || (find_type == StaticPrimitiveWrite);
-  uint32_t field_idx = is_static ? inst->VRegB_21c() : inst->VRegC_22c();
-  ArtField* f =
-      FindFieldFromCode<find_type, do_access_check>(field_idx, shadow_frame.GetMethod(), self,
-                                                    Primitive::ComponentSize(field_type));
-  if (UNLIKELY(f == nullptr)) {
-    CHECK(self->IsExceptionPending());
-    return false;
-  }
-  ObjPtr<Object> obj;
-  if (is_static) {
-    obj = f->GetDeclaringClass();
-  } else {
-    obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
-    if (UNLIKELY(obj == nullptr)) {
-      ThrowNullPointerExceptionForFieldAccess(f, false);
-      return false;
-    }
-  }
+template<Primitive::Type field_type, bool do_assignability_check, bool transaction_active>
+static inline bool DoFieldPutCommon(Thread* self,
+                                    const ShadowFrame& shadow_frame,
+                                    ObjPtr<mirror::Object>& obj,
+                                    ArtField* f,
+                                    const JValue& value)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
   f->GetDeclaringClass()->AssertInitializedOrInitializingInThread(self);
-  uint32_t vregA = is_static ? inst->VRegA_21c(inst_data) : inst->VRegA_22c(inst_data);
+
   // Report this field access to instrumentation if needed. Since we only have the offset of
   // the field from the base of the object, we need to look for it first.
   instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
@@ -275,42 +385,42 @@
     StackHandleScope<1> hs(self);
     // Wrap in handle wrapper in case the listener does thread suspension.
     HandleWrapperObjPtr<mirror::Object> h(hs.NewHandleWrapper(&obj));
-    JValue field_value = GetFieldValue<field_type>(shadow_frame, vregA);
-    ObjPtr<Object> this_object = f->IsStatic() ? nullptr : obj;
+    ObjPtr<mirror::Object> this_object = f->IsStatic() ? nullptr : obj;
     instrumentation->FieldWriteEvent(self, this_object.Ptr(),
                                      shadow_frame.GetMethod(),
                                      shadow_frame.GetDexPC(),
                                      f,
-                                     field_value);
+                                     value);
   }
+
   switch (field_type) {
     case Primitive::kPrimBoolean:
-      f->SetBoolean<transaction_active>(obj, shadow_frame.GetVReg(vregA));
+      f->SetBoolean<transaction_active>(obj, value.GetZ());
       break;
     case Primitive::kPrimByte:
-      f->SetByte<transaction_active>(obj, shadow_frame.GetVReg(vregA));
+      f->SetByte<transaction_active>(obj, value.GetB());
       break;
     case Primitive::kPrimChar:
-      f->SetChar<transaction_active>(obj, shadow_frame.GetVReg(vregA));
+      f->SetChar<transaction_active>(obj, value.GetC());
       break;
     case Primitive::kPrimShort:
-      f->SetShort<transaction_active>(obj, shadow_frame.GetVReg(vregA));
+      f->SetShort<transaction_active>(obj, value.GetS());
       break;
     case Primitive::kPrimInt:
-      f->SetInt<transaction_active>(obj, shadow_frame.GetVReg(vregA));
+      f->SetInt<transaction_active>(obj, value.GetI());
       break;
     case Primitive::kPrimLong:
-      f->SetLong<transaction_active>(obj, shadow_frame.GetVRegLong(vregA));
+      f->SetLong<transaction_active>(obj, value.GetJ());
       break;
     case Primitive::kPrimNot: {
-      Object* reg = shadow_frame.GetVRegReference(vregA);
+      ObjPtr<mirror::Object> reg = value.GetL();
       if (do_assignability_check && reg != nullptr) {
         // FieldHelper::GetType can resolve classes, use a handle wrapper which will restore the
         // object in the destructor.
-        ObjPtr<Class> field_class;
+        ObjPtr<mirror::Class> field_class;
         {
           StackHandleScope<2> hs(self);
-          HandleWrapper<mirror::Object> h_reg(hs.NewHandleWrapper(&reg));
+          HandleWrapperObjPtr<mirror::Object> h_reg(hs.NewHandleWrapper(&reg));
           HandleWrapperObjPtr<mirror::Object> h_obj(hs.NewHandleWrapper(&obj));
           field_class = f->GetType<true>();
         }
@@ -335,6 +445,40 @@
   return true;
 }
 
+template<FindFieldType find_type, Primitive::Type field_type, bool do_access_check,
+         bool transaction_active>
+bool DoFieldPut(Thread* self, const ShadowFrame& shadow_frame, const Instruction* inst,
+                uint16_t inst_data) {
+  const bool do_assignability_check = do_access_check;
+  bool is_static = (find_type == StaticObjectWrite) || (find_type == StaticPrimitiveWrite);
+  uint32_t field_idx = is_static ? inst->VRegB_21c() : inst->VRegC_22c();
+  ArtField* f =
+      FindFieldFromCode<find_type, do_access_check>(field_idx, shadow_frame.GetMethod(), self,
+                                                    Primitive::ComponentSize(field_type));
+  if (UNLIKELY(f == nullptr)) {
+    CHECK(self->IsExceptionPending());
+    return false;
+  }
+  ObjPtr<mirror::Object> obj;
+  if (is_static) {
+    obj = f->GetDeclaringClass();
+  } else {
+    obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
+    if (UNLIKELY(obj == nullptr)) {
+      ThrowNullPointerExceptionForFieldAccess(f, false);
+      return false;
+    }
+  }
+
+  uint32_t vregA = is_static ? inst->VRegA_21c(inst_data) : inst->VRegA_22c(inst_data);
+  JValue value = GetFieldValue<field_type>(shadow_frame, vregA);
+  return DoFieldPutCommon<field_type, do_assignability_check, transaction_active>(self,
+                                                                                  shadow_frame,
+                                                                                  obj,
+                                                                                  f,
+                                                                                  value);
+}
+
 // Explicitly instantiate all DoFieldPut functions.
 #define EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL(_find_type, _field_type, _do_check, _transaction_active) \
   template bool DoFieldPut<_find_type, _field_type, _do_check, _transaction_active>(Thread* self, \
@@ -367,9 +511,49 @@
 #undef EXPLICIT_DO_FIELD_PUT_ALL_TEMPLATE_DECL
 #undef EXPLICIT_DO_FIELD_PUT_TEMPLATE_DECL
 
+// Helper for setters in invoke-polymorphic.
+bool DoFieldPutForInvokePolymorphic(Thread* self,
+                                    ShadowFrame& shadow_frame,
+                                    ObjPtr<mirror::Object>& obj,
+                                    ArtField* field,
+                                    Primitive::Type field_type,
+                                    const JValue& value)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  static const bool kDoCheckAssignability = false;
+  static const bool kTransaction = false;
+  switch (field_type) {
+    case Primitive::kPrimBoolean:
+      return DoFieldPutCommon<Primitive::kPrimBoolean, kDoCheckAssignability, kTransaction>(
+          self, shadow_frame, obj, field, value);
+    case Primitive::kPrimByte:
+      return DoFieldPutCommon<Primitive::kPrimByte, kDoCheckAssignability, kTransaction>(
+          self, shadow_frame, obj, field, value);
+    case Primitive::kPrimChar:
+      return DoFieldPutCommon<Primitive::kPrimChar, kDoCheckAssignability, kTransaction>(
+          self, shadow_frame, obj, field, value);
+    case Primitive::kPrimShort:
+      return DoFieldPutCommon<Primitive::kPrimShort, kDoCheckAssignability, kTransaction>(
+          self, shadow_frame, obj, field, value);
+    case Primitive::kPrimInt:
+    case Primitive::kPrimFloat:
+      return DoFieldPutCommon<Primitive::kPrimInt, kDoCheckAssignability, kTransaction>(
+          self, shadow_frame, obj, field, value);
+    case Primitive::kPrimLong:
+    case Primitive::kPrimDouble:
+      return DoFieldPutCommon<Primitive::kPrimLong, kDoCheckAssignability, kTransaction>(
+          self, shadow_frame, obj, field, value);
+    case Primitive::kPrimNot:
+      return DoFieldPutCommon<Primitive::kPrimNot, kDoCheckAssignability, kTransaction>(
+          self, shadow_frame, obj, field, value);
+    case Primitive::kPrimVoid:
+      LOG(FATAL) << "Unreachable: " << field_type;
+      UNREACHABLE();
+  }
+}
+
 template<Primitive::Type field_type, bool transaction_active>
 bool DoIPutQuick(const ShadowFrame& shadow_frame, const Instruction* inst, uint16_t inst_data) {
-  Object* obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
+  ObjPtr<mirror::Object> obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
   if (UNLIKELY(obj == nullptr)) {
     // We lost the reference to the field index so we cannot get a more
     // precised exception message.
@@ -387,8 +571,15 @@
     DCHECK(f != nullptr);
     DCHECK(!f->IsStatic());
     JValue field_value = GetFieldValue<field_type>(shadow_frame, vregA);
-    instrumentation->FieldWriteEvent(Thread::Current(), obj, shadow_frame.GetMethod(),
-                                     shadow_frame.GetDexPC(), f, field_value);
+    StackHandleScope<1> hs(Thread::Current());
+    // Save obj in case the instrumentation event has thread suspension.
+    HandleWrapperObjPtr<mirror::Object> h = hs.NewHandleWrapper(&obj);
+    instrumentation->FieldWriteEvent(Thread::Current(),
+                                     obj.Ptr(),
+                                     shadow_frame.GetMethod(),
+                                     shadow_frame.GetDexPC(),
+                                     f,
+                                     field_value);
   }
   // Note: iput-x-quick instructions are only for non-volatile fields.
   switch (field_type) {
@@ -474,24 +665,6 @@
   UNREACHABLE();
 }
 
-// Assign register 'src_reg' from shadow_frame to register 'dest_reg' into new_shadow_frame.
-static inline void AssignRegister(ShadowFrame* new_shadow_frame, const ShadowFrame& shadow_frame,
-                                  size_t dest_reg, size_t src_reg)
-    REQUIRES_SHARED(Locks::mutator_lock_) {
-  // Uint required, so that sign extension does not make this wrong on 64b systems
-  uint32_t src_value = shadow_frame.GetVReg(src_reg);
-  mirror::Object* o = shadow_frame.GetVRegReference<kVerifyNone>(src_reg);
-
-  // If both register locations contains the same value, the register probably holds a reference.
-  // Note: As an optimization, non-moving collectors leave a stale reference value
-  // in the references array even after the original vreg was overwritten to a non-reference.
-  if (src_value == reinterpret_cast<uintptr_t>(o)) {
-    new_shadow_frame->SetVRegReference(dest_reg, o);
-  } else {
-    new_shadow_frame->SetVReg(dest_reg, src_value);
-  }
-}
-
 void AbortTransactionF(Thread* self, const char* fmt, ...) {
   va_list args;
   va_start(args, fmt);
@@ -508,16 +681,60 @@
   Runtime::Current()->AbortTransactionAndThrowAbortError(self, abort_msg);
 }
 
-// Separate declaration is required solely for the attributes.
+// START DECLARATIONS :
+//
+// These additional declarations are required because clang complains
+// about ALWAYS_INLINE (-Werror, -Wgcc-compat) in definitions.
+//
+
 template <bool is_range, bool do_assignability_check>
-    REQUIRES_SHARED(Locks::mutator_lock_)
-static inline bool DoCallCommon(ArtMethod* called_method,
-                                Thread* self,
-                                ShadowFrame& shadow_frame,
-                                JValue* result,
-                                uint16_t number_of_inputs,
-                                uint32_t (&arg)[Instruction::kMaxVarArgRegs],
-                                uint32_t vregC) ALWAYS_INLINE;
+static ALWAYS_INLINE bool DoCallCommon(ArtMethod* called_method,
+                                       Thread* self,
+                                       ShadowFrame& shadow_frame,
+                                       JValue* result,
+                                       uint16_t number_of_inputs,
+                                       uint32_t (&arg)[Instruction::kMaxVarArgRegs],
+                                       uint32_t vregC) REQUIRES_SHARED(Locks::mutator_lock_);
+
+template <bool is_range>
+static ALWAYS_INLINE bool DoCallPolymorphic(ArtMethod* called_method,
+                                            Handle<mirror::MethodType> callsite_type,
+                                            Handle<mirror::MethodType> target_type,
+                                            Thread* self,
+                                            ShadowFrame& shadow_frame,
+                                            JValue* result,
+                                            uint32_t (&arg)[Instruction::kMaxVarArgRegs],
+                                            uint32_t vregC,
+                                            const MethodHandleKind handle_kind)
+  REQUIRES_SHARED(Locks::mutator_lock_);
+
+template <bool is_range>
+static ALWAYS_INLINE bool DoCallTransform(ArtMethod* called_method,
+                                          Handle<mirror::MethodType> callsite_type,
+                                          Handle<mirror::MethodType> callee_type,
+                                          Thread* self,
+                                          ShadowFrame& shadow_frame,
+                                          Handle<mirror::MethodHandleImpl> receiver,
+                                          JValue* result,
+                                          uint32_t (&arg)[Instruction::kMaxVarArgRegs],
+                                          uint32_t vregC) REQUIRES_SHARED(Locks::mutator_lock_);
+
+ALWAYS_INLINE void PerformCall(Thread* self,
+                               const DexFile::CodeItem* code_item,
+                               ArtMethod* caller_method,
+                               const size_t first_dest_reg,
+                               ShadowFrame* callee_frame,
+                               JValue* result) REQUIRES_SHARED(Locks::mutator_lock_);
+
+template <bool is_range>
+ALWAYS_INLINE void CopyRegisters(ShadowFrame& caller_frame,
+                                 ShadowFrame* callee_frame,
+                                 const uint32_t (&arg)[Instruction::kMaxVarArgRegs],
+                                 const size_t first_src_reg,
+                                 const size_t first_dest_reg,
+                                 const size_t num_regs) REQUIRES_SHARED(Locks::mutator_lock_);
+
+// END DECLARATIONS.
 
 void ArtInterpreterToCompiledCodeBridge(Thread* self,
                                         ArtMethod* caller,
@@ -528,7 +745,7 @@
   ArtMethod* method = shadow_frame->GetMethod();
   // Ensure static methods are initialized.
   if (method->IsStatic()) {
-    mirror::Class* declaringClass = method->GetDeclaringClass();
+    ObjPtr<mirror::Class> declaringClass = method->GetDeclaringClass();
     if (UNLIKELY(!declaringClass->IsInitialized())) {
       self->PushShadowFrame(shadow_frame);
       StackHandleScope<1> hs(self);
@@ -561,7 +778,7 @@
                                     uint16_t this_obj_vreg,
                                     JValue result)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  Object* existing = shadow_frame->GetVRegReference(this_obj_vreg);
+  ObjPtr<mirror::Object> existing = shadow_frame->GetVRegReference(this_obj_vreg);
   if (existing == nullptr) {
     // If it's null, we come from compiled code that was deoptimized. Nothing to do,
     // as the compiler verified there was no alias.
@@ -581,15 +798,71 @@
   }
 }
 
+inline static bool IsInvokeExact(const DexFile& dex_file, int invoke_method_idx) {
+  // This check uses string comparison as it needs less code and data
+  // to do than fetching the associated ArtMethod from the DexCache
+  // and checking against ArtMethods in the well known classes. The
+  // verifier needs to perform a more rigorous check.
+  const char* method_name = dex_file.GetMethodName(dex_file.GetMethodId(invoke_method_idx));
+  bool is_invoke_exact = (0 == strcmp(method_name, "invokeExact"));
+  DCHECK(is_invoke_exact || (0 == strcmp(method_name, "invoke")));
+  return is_invoke_exact;
+}
+
+inline static ObjPtr<mirror::Class> GetAndInitializeDeclaringClass(Thread* self, ArtField* field)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  // Method handle invocations on static fields should ensure class is
+  // initialized. This usually happens when an instance is constructed
+  // or class members referenced, but this is not guaranteed when
+  // looking up method handles.
+  ObjPtr<mirror::Class> klass = field->GetDeclaringClass();
+  if (UNLIKELY(!klass->IsInitialized())) {
+    StackHandleScope<1> hs(self);
+    HandleWrapperObjPtr<mirror::Class> h(hs.NewHandleWrapper(&klass));
+    if (!Runtime::Current()->GetClassLinker()->EnsureInitialized(self, h, true, true)) {
+      DCHECK(self->IsExceptionPending());
+      return nullptr;
+    }
+  }
+  return klass;
+}
+
+// Returns true iff. the callsite type for a polymorphic invoke is transformer
+// like, i.e that it has a single input argument whose type is
+// dalvik.system.EmulatedStackFrame.
+static inline bool IsCallerTransformer(Handle<mirror::MethodType> callsite_type)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  ObjPtr<mirror::ObjectArray<mirror::Class>> param_types(callsite_type->GetPTypes());
+  if (param_types->GetLength() == 1) {
+    ObjPtr<mirror::Class> param(param_types->GetWithoutChecks(0));
+    return param == WellKnownClasses::ToClass(WellKnownClasses::dalvik_system_EmulatedStackFrame);
+  }
+
+  return false;
+}
+
 template<bool is_range, bool do_access_check>
-    REQUIRES_SHARED(Locks::mutator_lock_)
-inline bool DoInvokePolymorphic(Thread* self, ShadowFrame& shadow_frame,
-                                const Instruction* inst, uint16_t inst_data,
-                                JValue* result) {
+inline bool DoInvokePolymorphic(Thread* self,
+                                ShadowFrame& shadow_frame,
+                                const Instruction* inst,
+                                uint16_t inst_data,
+                                JValue* result)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
   // Invoke-polymorphic instructions always take a receiver. i.e, they are never static.
   const uint32_t vRegC = (is_range) ? inst->VRegC_4rcc() : inst->VRegC_45cc();
+  const int invoke_method_idx = (is_range) ? inst->VRegB_4rcc() : inst->VRegB_45cc();
 
-  // The method_idx here is the name of the signature polymorphic method that
+  // Initialize |result| to 0 as this is the default return value for
+  // polymorphic invocations of method handle types with void return
+  // and provides sane return result in error cases.
+  result->SetJ(0);
+
+  // Determine if this invocation is MethodHandle.invoke() or
+  // MethodHandle.invokeExact().
+  bool is_invoke_exact = IsInvokeExact(shadow_frame.GetMethod()->GetDeclaringClass()->GetDexFile(),
+                                       invoke_method_idx);
+
+  // The invoke_method_idx here is the name of the signature polymorphic method that
   // was symbolically invoked in bytecode (say MethodHandle.invoke or MethodHandle.invokeExact)
   // and not the method that we'll dispatch to in the end.
   //
@@ -597,15 +870,14 @@
   // signature polymorphic method so that we disallow calls via invoke-polymorphic
   // to non sig-poly methods. This would also have the side effect of verifying
   // that vRegC really is a reference type.
-  mirror::MethodHandleImpl* const method_handle =
-      reinterpret_cast<mirror::MethodHandleImpl*>(shadow_frame.GetVRegReference(vRegC));
-  if (UNLIKELY(method_handle == nullptr)) {
-    const int method_idx = (is_range) ? inst->VRegB_4rcc() : inst->VRegB_45cc();
+  StackHandleScope<6> hs(self);
+  Handle<mirror::MethodHandleImpl> method_handle(hs.NewHandle(
+      ObjPtr<mirror::MethodHandleImpl>::DownCast(
+          MakeObjPtr(shadow_frame.GetVRegReference(vRegC)))));
+  if (UNLIKELY(method_handle.Get() == nullptr)) {
     // Note that the invoke type is kVirtual here because a call to a signature
     // polymorphic method is shaped like a virtual call at the bytecode level.
-    ThrowNullPointerExceptionForMethodAccess(method_idx, InvokeType::kVirtual);
-
-    result->SetJ(0);
+    ThrowNullPointerExceptionForMethodAccess(invoke_method_idx, InvokeType::kVirtual);
     return false;
   }
 
@@ -616,43 +888,53 @@
   // Call through to the classlinker and ask it to resolve the static type associated
   // with the callsite. This information is stored in the dex cache so it's
   // guaranteed to be fast after the first resolution.
-  StackHandleScope<2> hs(self);
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  mirror::Class* caller_class = shadow_frame.GetMethod()->GetDeclaringClass();
-  mirror::MethodType* callsite_type = class_linker->ResolveMethodType(
+  Handle<mirror::Class> caller_class(hs.NewHandle(shadow_frame.GetMethod()->GetDeclaringClass()));
+  Handle<mirror::MethodType> callsite_type(hs.NewHandle(class_linker->ResolveMethodType(
       caller_class->GetDexFile(), callsite_proto_id,
       hs.NewHandle<mirror::DexCache>(caller_class->GetDexCache()),
-      hs.NewHandle<mirror::ClassLoader>(caller_class->GetClassLoader()));
+      hs.NewHandle<mirror::ClassLoader>(caller_class->GetClassLoader()))));
 
   // This implies we couldn't resolve one or more types in this method handle.
-  if (UNLIKELY(callsite_type == nullptr)) {
+  if (UNLIKELY(callsite_type.Get() == nullptr)) {
     CHECK(self->IsExceptionPending());
-    result->SetJ(0);
     return false;
   }
 
-  const char* old_cause = self->StartAssertNoThreadSuspension("DoInvokePolymorphic");
-
-  // Get the method we're actually invoking along with the kind of
-  // invoke that is desired. We don't need to perform access checks at this
-  // point because they would have been performed on our behalf at the point
-  // of creation of the method handle.
-  ArtMethod* called_method = method_handle->GetTargetMethod();
   const MethodHandleKind handle_kind = method_handle->GetHandleKind();
-  mirror::MethodType* const handle_type = method_handle->GetMethodType();
-  CHECK(called_method != nullptr);
-  CHECK(handle_type != nullptr);
+  Handle<mirror::MethodType> handle_type(hs.NewHandle(method_handle->GetMethodType()));
+  CHECK(handle_type.Get() != nullptr);
+  {
+    // We need to check the nominal type of the handle in addition to the
+    // real type. The "nominal" type is present when MethodHandle.asType is
+    // called any handle, and results in the declared type of the handle
+    // changing.
+    ObjPtr<mirror::MethodType> nominal_type(method_handle->GetNominalType());
+    ObjPtr<mirror::MethodType> check_type(nullptr);
+    if (LIKELY(nominal_type.Ptr() == nullptr)) {
+      check_type.Assign(handle_type.Get());
+    } else {
+      check_type.Assign(nominal_type.Ptr());
+    }
 
-  // We now have to massage the number of inputs to the target function.
-  // It's always one less than the number of inputs to the signature polymorphic
-  // invoke, the first input being a reference to the MethodHandle itself.
-  const uint16_t number_of_inputs =
-      ((is_range) ? inst->VRegA_4rcc(inst_data) : inst->VRegA_45cc(inst_data)) - 1;
+    if (is_invoke_exact) {
+      if (UNLIKELY(!callsite_type->IsExactMatch(check_type.Ptr()))) {
+        ThrowWrongMethodTypeException(check_type.Ptr(), callsite_type.Get());
+        return false;
+      }
+    } else {
+      if (UNLIKELY(!IsCallerTransformer(callsite_type) &&
+                   !callsite_type->IsConvertible(check_type.Ptr()))) {
+        ThrowWrongMethodTypeException(check_type.Ptr(), callsite_type.Get());
+        return false;
+      }
+    }
+  }
 
   uint32_t arg[Instruction::kMaxVarArgRegs] = {};
-  uint32_t receiver_vregC = 0;
+  uint32_t first_src_reg = 0;
   if (is_range) {
-    receiver_vregC = (inst->VRegC_4rcc() + 1);
+    first_src_reg = (inst->VRegC_4rcc() + 1);
   } else {
     inst->GetVarArgs(arg, inst_data);
     arg[0] = arg[1];
@@ -660,49 +942,401 @@
     arg[2] = arg[3];
     arg[3] = arg[4];
     arg[4] = 0;
-    receiver_vregC = arg[0];
+    first_src_reg = arg[0];
   }
 
   if (IsInvoke(handle_kind)) {
+    // Get the method we're actually invoking along with the kind of
+    // invoke that is desired. We don't need to perform access checks at this
+    // point because they would have been performed on our behalf at the point
+    // of creation of the method handle.
+    ArtMethod* called_method = method_handle->GetTargetMethod();
+    CHECK(called_method != nullptr);
+
     if (handle_kind == kInvokeVirtual || handle_kind == kInvokeInterface) {
-      mirror::Object* receiver = shadow_frame.GetVRegReference(receiver_vregC);
-      mirror::Class* declaring_class = called_method->GetDeclaringClass();
-      // Verify that _vRegC is an object reference and of the type expected by
-      // the receiver.
-      called_method = receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(
-          called_method, kRuntimePointerSize);
-      if (!VerifyObjectIsClass(receiver, declaring_class)) {
-        self->EndAssertNoThreadSuspension(old_cause);
+      // TODO: Unfortunately, we have to postpone dynamic receiver based checks
+      // because the receiver might be cast or might come from an emulated stack
+      // frame, which means that it is unknown at this point. We perform these
+      // checks inside DoCallPolymorphic right before we do the actual invoke.
+    } else if (handle_kind == kInvokeDirect) {
+      // String constructors are a special case, they are replaced with StringFactory
+      // methods.
+      if (called_method->IsConstructor() && called_method->GetDeclaringClass()->IsStringClass()) {
+        DCHECK(handle_type->GetRType()->IsStringClass());
+        called_method = WellKnownClasses::StringInitToStringFactory(called_method);
+      }
+    } else if (handle_kind == kInvokeSuper) {
+      ObjPtr<mirror::Class> declaring_class = called_method->GetDeclaringClass();
+
+      // Note that we're not dynamically dispatching on the type of the receiver
+      // here. We use the static type of the "receiver" object that we've
+      // recorded in the method handle's type, which will be the same as the
+      // special caller that was specified at the point of lookup.
+      ObjPtr<mirror::Class> referrer_class = handle_type->GetPTypes()->Get(0);
+      if (!declaring_class->IsInterface()) {
+        ObjPtr<mirror::Class> super_class = referrer_class->GetSuperClass();
+        uint16_t vtable_index = called_method->GetMethodIndex();
+        DCHECK(super_class != nullptr);
+        DCHECK(super_class->HasVTable());
+        // Note that super_class is a super of referrer_class and called_method
+        // will always be declared by super_class (or one of its super classes).
+        DCHECK_LT(vtable_index, super_class->GetVTableLength());
+        called_method = super_class->GetVTableEntry(vtable_index, kRuntimePointerSize);
+      } else {
+        called_method = referrer_class->FindVirtualMethodForInterfaceSuper(
+            called_method, kRuntimePointerSize);
+      }
+
+      CHECK(called_method != nullptr);
+    }
+
+    bool call_success;
+    if (handle_kind == kInvokeTransform) {
+      call_success = DoCallTransform<is_range>(called_method,
+                                               callsite_type,
+                                               handle_type,
+                                               self,
+                                               shadow_frame,
+                                               method_handle /* receiver */,
+                                               result,
+                                               arg,
+                                               first_src_reg);
+    } else {
+      call_success = DoCallPolymorphic<is_range>(called_method,
+                                                 callsite_type,
+                                                 handle_type,
+                                                 self,
+                                                 shadow_frame,
+                                                 result,
+                                                 arg,
+                                                 first_src_reg,
+                                                 handle_kind);
+    }
+    if (LIKELY(call_success && ConvertReturnValue(callsite_type, handle_type, result))) {
+      return true;
+    }
+    DCHECK(self->IsExceptionPending());
+    return false;
+  } else {
+    DCHECK(!is_range);
+    ArtField* field = method_handle->GetTargetField();
+    Primitive::Type field_type = field->GetTypeAsPrimitiveType();
+
+    switch (handle_kind) {
+      case kInstanceGet: {
+        ObjPtr<mirror::Object> obj = shadow_frame.GetVRegReference(first_src_reg);
+        DoFieldGetForInvokePolymorphic(self, shadow_frame, obj, field, field_type, result);
+        if (!ConvertReturnValue(callsite_type, handle_type, result)) {
+          DCHECK(self->IsExceptionPending());
+          return false;
+        }
+        return true;
+      }
+      case kStaticGet: {
+        ObjPtr<mirror::Object> obj = GetAndInitializeDeclaringClass(self, field);
+        if (obj == nullptr) {
+          DCHECK(self->IsExceptionPending());
+          return false;
+        }
+        DoFieldGetForInvokePolymorphic(self, shadow_frame, obj, field, field_type, result);
+        if (!ConvertReturnValue(callsite_type, handle_type, result)) {
+          DCHECK(self->IsExceptionPending());
+          return false;
+        }
+        return true;
+      }
+      case kInstancePut: {
+        JValue value = GetFieldValue(shadow_frame, field_type, arg[1]);
+        if (!ConvertArgumentValue(callsite_type, handle_type, 1, &value)) {
+          DCHECK(self->IsExceptionPending());
+          return false;
+        }
+        ObjPtr<mirror::Object> obj = shadow_frame.GetVRegReference(first_src_reg);
+        return DoFieldPutForInvokePolymorphic(self, shadow_frame, obj, field, field_type, value);
+      }
+      case kStaticPut: {
+        JValue value = GetFieldValue(shadow_frame, field_type, arg[0]);
+        if (!ConvertArgumentValue(callsite_type, handle_type, 0, &value)) {
+          DCHECK(self->IsExceptionPending());
+          return false;
+        }
+        ObjPtr<mirror::Object> obj = field->GetDeclaringClass();
+        return DoFieldPutForInvokePolymorphic(self, shadow_frame, obj, field, field_type, value);
+      }
+      default:
+        LOG(FATAL) << "Unreachable: " << handle_kind;
+        UNREACHABLE();
+    }
+  }
+}
+
+// Calculate the number of ins for a proxy or native method, where we
+// can't just look at the code item.
+static inline size_t GetInsForProxyOrNativeMethod(ArtMethod* method)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  DCHECK(method->IsNative() || method->IsProxyMethod());
+
+  method = method->GetInterfaceMethodIfProxy(kRuntimePointerSize);
+  size_t num_ins = 0;
+  // Separate accounting for the receiver, which isn't a part of the
+  // shorty.
+  if (!method->IsStatic()) {
+    ++num_ins;
+  }
+
+  uint32_t shorty_len = 0;
+  const char* shorty = method->GetShorty(&shorty_len);
+  for (size_t i = 1; i < shorty_len; ++i) {
+    const char c = shorty[i];
+    ++num_ins;
+    if (c == 'J' || c == 'D') {
+      ++num_ins;
+    }
+  }
+
+  return num_ins;
+}
+
+
+inline void PerformCall(Thread* self,
+                        const DexFile::CodeItem* code_item,
+                        ArtMethod* caller_method,
+                        const size_t first_dest_reg,
+                        ShadowFrame* callee_frame,
+                        JValue* result) {
+  if (LIKELY(Runtime::Current()->IsStarted())) {
+    ArtMethod* target = callee_frame->GetMethod();
+    if (ClassLinker::ShouldUseInterpreterEntrypoint(
+        target,
+        target->GetEntryPointFromQuickCompiledCode())) {
+      ArtInterpreterToInterpreterBridge(self, code_item, callee_frame, result);
+    } else {
+      ArtInterpreterToCompiledCodeBridge(
+          self, caller_method, code_item, callee_frame, result);
+    }
+  } else {
+    UnstartedRuntime::Invoke(self, code_item, callee_frame, result, first_dest_reg);
+  }
+}
+
+template <bool is_range>
+inline void CopyRegisters(ShadowFrame& caller_frame,
+                          ShadowFrame* callee_frame,
+                          const uint32_t (&arg)[Instruction::kMaxVarArgRegs],
+                          const size_t first_src_reg,
+                          const size_t first_dest_reg,
+                          const size_t num_regs) {
+  if (is_range) {
+    const size_t dest_reg_bound = first_dest_reg + num_regs;
+    for (size_t src_reg = first_src_reg, dest_reg = first_dest_reg; dest_reg < dest_reg_bound;
+        ++dest_reg, ++src_reg) {
+      AssignRegister(callee_frame, caller_frame, dest_reg, src_reg);
+    }
+  } else {
+    DCHECK_LE(num_regs, arraysize(arg));
+
+    for (size_t arg_index = 0; arg_index < num_regs; ++arg_index) {
+      AssignRegister(callee_frame, caller_frame, first_dest_reg + arg_index, arg[arg_index]);
+    }
+  }
+}
+
+template <bool is_range>
+static inline bool DoCallPolymorphic(ArtMethod* called_method,
+                                     Handle<mirror::MethodType> callsite_type,
+                                     Handle<mirror::MethodType> target_type,
+                                     Thread* self,
+                                     ShadowFrame& shadow_frame,
+                                     JValue* result,
+                                     uint32_t (&arg)[Instruction::kMaxVarArgRegs],
+                                     uint32_t first_src_reg,
+                                     const MethodHandleKind handle_kind) {
+  // Compute method information.
+  const DexFile::CodeItem* code_item = called_method->GetCodeItem();
+
+  // Number of registers for the callee's call frame. Note that for non-exact
+  // invokes, we always derive this information from the callee method. We
+  // cannot guarantee during verification that the number of registers encoded
+  // in the invoke is equal to the number of ins for the callee. This is because
+  // some transformations (such as boxing a long -> Long or wideining an
+  // int -> long will change that number.
+  uint16_t num_regs;
+  size_t num_input_regs;
+  size_t first_dest_reg;
+  if (LIKELY(code_item != nullptr)) {
+    num_regs = code_item->registers_size_;
+    first_dest_reg = num_regs - code_item->ins_size_;
+    num_input_regs = code_item->ins_size_;
+    // Parameter registers go at the end of the shadow frame.
+    DCHECK_NE(first_dest_reg, (size_t)-1);
+  } else {
+    // No local regs for proxy and native methods.
+    DCHECK(called_method->IsNative() || called_method->IsProxyMethod());
+    num_regs = num_input_regs = GetInsForProxyOrNativeMethod(called_method);
+    first_dest_reg = 0;
+  }
+
+  // Allocate shadow frame on the stack.
+  ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr =
+      CREATE_SHADOW_FRAME(num_regs, &shadow_frame, called_method, /* dex pc */ 0);
+  ShadowFrame* new_shadow_frame = shadow_frame_unique_ptr.get();
+
+  // Whether this polymorphic invoke was issued by a transformer method.
+  bool is_caller_transformer = false;
+  // Thread might be suspended during PerformArgumentConversions due to the
+  // allocations performed during boxing.
+  {
+    ScopedStackedShadowFramePusher pusher(
+        self, new_shadow_frame, StackedShadowFrameType::kShadowFrameUnderConstruction);
+    if (callsite_type->IsExactMatch(target_type.Get())) {
+      // This is an exact invoke, we can take the fast path of just copying all
+      // registers without performing any argument conversions.
+      CopyRegisters<is_range>(shadow_frame,
+                              new_shadow_frame,
+                              arg,
+                              first_src_reg,
+                              first_dest_reg,
+                              num_input_regs);
+    } else {
+      // This includes the case where we're entering this invoke-polymorphic
+      // from a transformer method. In that case, the callsite_type will contain
+      // a single argument of type dalvik.system.EmulatedStackFrame. In that
+      // case, we'll have to unmarshal the EmulatedStackFrame into the
+      // new_shadow_frame and perform argument conversions on it.
+      if (IsCallerTransformer(callsite_type)) {
+        is_caller_transformer = true;
+        // The emulated stack frame is the first and only argument when we're coming
+        // through from a transformer.
+        ObjPtr<mirror::EmulatedStackFrame> emulated_stack_frame(
+            reinterpret_cast<mirror::EmulatedStackFrame*>(
+                shadow_frame.GetVRegReference(first_src_reg)));
+        if (!emulated_stack_frame->WriteToShadowFrame(self,
+                                                      target_type,
+                                                      first_dest_reg,
+                                                      new_shadow_frame)) {
+          DCHECK(self->IsExceptionPending());
+          result->SetL(0);
+          return false;
+        }
+      } else if (!ConvertAndCopyArgumentsFromCallerFrame<is_range>(self,
+                                                                   callsite_type,
+                                                                   target_type,
+                                                                   shadow_frame,
+                                                                   first_src_reg,
+                                                                   first_dest_reg,
+                                                                   arg,
+                                                                   new_shadow_frame)) {
+        DCHECK(self->IsExceptionPending());
+        result->SetL(0);
         return false;
       }
-    } else if (handle_kind == kInvokeDirect) {
-      // TODO(narayan) : We need to handle the case where the target method is a
-      // constructor here. Also the case where we don't want to dynamically
-      // dispatch based on the type of the receiver.
-      self->EndAssertNoThreadSuspension(old_cause);
-      UNIMPLEMENTED(FATAL) << "Direct invokes are not implemented yet.";
+    }
+  }
+
+  // See TODO in DoInvokePolymorphic : We need to perform this dynamic, receiver
+  // based dispatch right before we perform the actual call, because the
+  // receiver isn't known very early.
+  if (handle_kind == kInvokeVirtual || handle_kind == kInvokeInterface) {
+    ObjPtr<mirror::Object> receiver(new_shadow_frame->GetVRegReference(first_dest_reg));
+    ObjPtr<mirror::Class> declaring_class(called_method->GetDeclaringClass());
+    // Verify that _vRegC is an object reference and of the type expected by
+    // the receiver.
+    if (!VerifyObjectIsClass(receiver, declaring_class)) {
+      DCHECK(self->IsExceptionPending());
       return false;
     }
 
-    // NOTE: handle_kind == kInvokeStatic needs no special treatment here. We
-    // can directly make the call. handle_kind == kInvokeSuper doesn't have any
-    // particular use and can probably be dropped.
-    if (callsite_type->IsExactMatch(handle_type)) {
-      self->EndAssertNoThreadSuspension(old_cause);
-      return DoCallCommon<is_range, do_access_check>(
-          called_method, self, shadow_frame, result, number_of_inputs,
-          arg, receiver_vregC);
-    }
-
-    self->EndAssertNoThreadSuspension(old_cause);
-    UNIMPLEMENTED(FATAL) << "Non exact invokes are not implemented yet.";
-    return false;
-  } else {
-    // TODO(narayan): Implement field getters and setters.
-    self->EndAssertNoThreadSuspension(old_cause);
-    UNIMPLEMENTED(FATAL) << "Field references in method handles are not implemented yet.";
-    return false;
+    called_method = receiver->GetClass()->FindVirtualMethodForVirtualOrInterface(
+        called_method, kRuntimePointerSize);
   }
+
+  PerformCall(self, code_item, shadow_frame.GetMethod(), first_dest_reg, new_shadow_frame, result);
+
+  // If the caller of this signature polymorphic method was a transformer,
+  // we need to copy the result back out to the emulated stack frame.
+  if (is_caller_transformer && !self->IsExceptionPending()) {
+    ObjPtr<mirror::EmulatedStackFrame> emulated_stack_frame(
+        reinterpret_cast<mirror::EmulatedStackFrame*>(
+            shadow_frame.GetVRegReference(first_src_reg)));
+
+    emulated_stack_frame->SetReturnValue(self, *result);
+  }
+
+  return !self->IsExceptionPending();
+}
+
+template <bool is_range>
+static inline bool DoCallTransform(ArtMethod* called_method,
+                                   Handle<mirror::MethodType> callsite_type,
+                                   Handle<mirror::MethodType> callee_type,
+                                   Thread* self,
+                                   ShadowFrame& shadow_frame,
+                                   Handle<mirror::MethodHandleImpl> receiver,
+                                   JValue* result,
+                                   uint32_t (&arg)[Instruction::kMaxVarArgRegs],
+                                   uint32_t first_src_reg) {
+  // This can be fixed to two, because the method we're calling here
+  // (MethodHandle.transformInternal) doesn't have any locals and the signature
+  // is known :
+  //
+  // private MethodHandle.transformInternal(EmulatedStackFrame sf);
+  //
+  // This means we need only two vregs :
+  // - One for the receiver object.
+  // - One for the only method argument (an EmulatedStackFrame).
+  static constexpr size_t kNumRegsForTransform = 2;
+
+  const DexFile::CodeItem* code_item = called_method->GetCodeItem();
+  DCHECK(code_item != nullptr);
+  DCHECK_EQ(kNumRegsForTransform, code_item->registers_size_);
+  DCHECK_EQ(kNumRegsForTransform, code_item->ins_size_);
+
+  ShadowFrameAllocaUniquePtr shadow_frame_unique_ptr =
+      CREATE_SHADOW_FRAME(kNumRegsForTransform, &shadow_frame, called_method, /* dex pc */ 0);
+  ShadowFrame* new_shadow_frame = shadow_frame_unique_ptr.get();
+
+  StackHandleScope<1> hs(self);
+  MutableHandle<mirror::EmulatedStackFrame> sf(hs.NewHandle<mirror::EmulatedStackFrame>(nullptr));
+  if (IsCallerTransformer(callsite_type)) {
+    // If we're entering this transformer from another transformer, we can pass
+    // through the handle directly to the callee, instead of having to
+    // instantiate a new stack frame based on the shadow frame.
+    sf.Assign(reinterpret_cast<mirror::EmulatedStackFrame*>(
+        shadow_frame.GetVRegReference(first_src_reg)));
+  } else {
+    sf.Assign(mirror::EmulatedStackFrame::CreateFromShadowFrameAndArgs<is_range>(
+        self,
+        callsite_type,
+        callee_type,
+        shadow_frame,
+        first_src_reg,
+        arg));
+
+    // Something went wrong while creating the emulated stack frame, we should
+    // throw the pending exception.
+    if (sf.Get() == nullptr) {
+      DCHECK(self->IsExceptionPending());
+      return false;
+    }
+  }
+
+  new_shadow_frame->SetVRegReference(0, receiver.Get());
+  new_shadow_frame->SetVRegReference(1, sf.Get());
+
+  PerformCall(self,
+              code_item,
+              shadow_frame.GetMethod(),
+              0 /* first dest reg */,
+              new_shadow_frame,
+              result);
+
+  // If the called transformer method we called has returned a value, then we
+  // need to copy it back to |result|.
+  if (!self->IsExceptionPending()) {
+    sf->GetReturnValue(self, result);
+  }
+
+  return !self->IsExceptionPending();
 }
 
 template <bool is_range,
@@ -823,15 +1457,22 @@
       switch (shorty[shorty_pos + 1]) {
         // Handle Object references. 1 virtual register slot.
         case 'L': {
-          Object* o = shadow_frame.GetVRegReference(src_reg);
+          ObjPtr<mirror::Object> o = shadow_frame.GetVRegReference(src_reg);
           if (do_assignability_check && o != nullptr) {
             PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
-            Class* arg_type =
-                method->GetClassFromTypeIndex(
-                    params->GetTypeItem(shorty_pos).type_idx_, true /* resolve */, pointer_size);
+            const uint32_t type_idx = params->GetTypeItem(shorty_pos).type_idx_;
+            ObjPtr<mirror::Class> arg_type = method->GetDexCacheResolvedType(type_idx,
+                                                                             pointer_size);
             if (arg_type == nullptr) {
-              CHECK(self->IsExceptionPending());
-              return false;
+              StackHandleScope<1> hs(self);
+              // Preserve o since it is used below and GetClassFromTypeIndex may cause thread
+              // suspension.
+              HandleWrapperObjPtr<mirror::Object> h = hs.NewHandleWrapper(&o);
+              arg_type = method->GetClassFromTypeIndex(type_idx, true /* resolve */, pointer_size);
+              if (arg_type == nullptr) {
+                CHECK(self->IsExceptionPending());
+                return false;
+              }
             }
             if (!o->VerifierInstanceOf(arg_type)) {
               // This should never happen.
@@ -844,7 +1485,7 @@
               return false;
             }
           }
-          new_shadow_frame->SetVRegReference(dest_reg, o);
+          new_shadow_frame->SetVRegReference(dest_reg, o.Ptr());
           break;
         }
         // Handle doubles and longs. 2 consecutive virtual register slots.
@@ -865,40 +1506,20 @@
       }
     }
   } else {
-    size_t arg_index = 0;
-
-    // Fast path: no extra checks.
     if (is_range) {
-      uint16_t first_src_reg = vregC;
-
-      for (size_t src_reg = first_src_reg, dest_reg = first_dest_reg; dest_reg < num_regs;
-          ++dest_reg, ++src_reg) {
-        AssignRegister(new_shadow_frame, shadow_frame, dest_reg, src_reg);
-      }
-    } else {
-      DCHECK_LE(number_of_inputs, arraysize(arg));
-
-      for (; arg_index < number_of_inputs; ++arg_index) {
-        AssignRegister(new_shadow_frame, shadow_frame, first_dest_reg + arg_index, arg[arg_index]);
-      }
+      DCHECK_EQ(num_regs, first_dest_reg + number_of_inputs);
     }
+
+    CopyRegisters<is_range>(shadow_frame,
+                            new_shadow_frame,
+                            arg,
+                            vregC,
+                            first_dest_reg,
+                            number_of_inputs);
     self->EndAssertNoThreadSuspension(old_cause);
   }
 
-  // Do the call now.
-  if (LIKELY(Runtime::Current()->IsStarted())) {
-    ArtMethod* target = new_shadow_frame->GetMethod();
-    if (ClassLinker::ShouldUseInterpreterEntrypoint(
-        target,
-        target->GetEntryPointFromQuickCompiledCode())) {
-      ArtInterpreterToInterpreterBridge(self, code_item, new_shadow_frame, result);
-    } else {
-      ArtInterpreterToCompiledCodeBridge(
-          self, shadow_frame.GetMethod(), code_item, new_shadow_frame, result);
-    }
-  } else {
-    UnstartedRuntime::Invoke(self, code_item, new_shadow_frame, result, first_dest_reg);
-  }
+  PerformCall(self, code_item, shadow_frame.GetMethod(), first_dest_reg, new_shadow_frame, result);
 
   if (string_init && !self->IsExceptionPending()) {
     SetStringInitValueToAllAliases(&shadow_frame, string_init_vreg_this, *result);
@@ -931,8 +1552,10 @@
 }
 
 template <bool is_range, bool do_access_check, bool transaction_active>
-bool DoFilledNewArray(const Instruction* inst, const ShadowFrame& shadow_frame,
-                      Thread* self, JValue* result) {
+bool DoFilledNewArray(const Instruction* inst,
+                      const ShadowFrame& shadow_frame,
+                      Thread* self,
+                      JValue* result) {
   DCHECK(inst->Opcode() == Instruction::FILLED_NEW_ARRAY ||
          inst->Opcode() == Instruction::FILLED_NEW_ARRAY_RANGE);
   const int32_t length = is_range ? inst->VRegA_3rc() : inst->VRegA_35c();
@@ -945,29 +1568,35 @@
     return false;
   }
   uint16_t type_idx = is_range ? inst->VRegB_3rc() : inst->VRegB_35c();
-  Class* array_class = ResolveVerifyAndClinit(type_idx, shadow_frame.GetMethod(),
-                                              self, false, do_access_check);
+  ObjPtr<mirror::Class> array_class = ResolveVerifyAndClinit(type_idx,
+                                                             shadow_frame.GetMethod(),
+                                                             self,
+                                                             false,
+                                                             do_access_check);
   if (UNLIKELY(array_class == nullptr)) {
     DCHECK(self->IsExceptionPending());
     return false;
   }
   CHECK(array_class->IsArrayClass());
-  Class* component_class = array_class->GetComponentType();
+  ObjPtr<mirror::Class> component_class = array_class->GetComponentType();
   const bool is_primitive_int_component = component_class->IsPrimitiveInt();
   if (UNLIKELY(component_class->IsPrimitive() && !is_primitive_int_component)) {
     if (component_class->IsPrimitiveLong() || component_class->IsPrimitiveDouble()) {
       ThrowRuntimeException("Bad filled array request for type %s",
-                            PrettyDescriptor(component_class).c_str());
+                            component_class->PrettyDescriptor().c_str());
     } else {
       self->ThrowNewExceptionF("Ljava/lang/InternalError;",
                                "Found type %s; filled-new-array not implemented for anything but 'int'",
-                               PrettyDescriptor(component_class).c_str());
+                               component_class->PrettyDescriptor().c_str());
     }
     return false;
   }
-  Object* new_array = Array::Alloc<true>(self, array_class, length,
-                                         array_class->GetComponentSizeShift(),
-                                         Runtime::Current()->GetHeap()->GetCurrentAllocator());
+  ObjPtr<mirror::Object> new_array = mirror::Array::Alloc<true>(
+      self,
+      array_class,
+      length,
+      array_class->GetComponentSizeShift(),
+      Runtime::Current()->GetHeap()->GetCurrentAllocator());
   if (UNLIKELY(new_array == nullptr)) {
     self->AssertPendingOOMException();
     return false;
@@ -985,7 +1614,7 @@
       new_array->AsIntArray()->SetWithoutChecks<transaction_active>(
           i, shadow_frame.GetVReg(src_reg));
     } else {
-      new_array->AsObjectArray<Object>()->SetWithoutChecks<transaction_active>(
+      new_array->AsObjectArray<mirror::Object>()->SetWithoutChecks<transaction_active>(
           i, shadow_frame.GetVRegReference(src_reg));
     }
   }
@@ -994,17 +1623,18 @@
   return true;
 }
 
-// TODO fix thread analysis: should be REQUIRES_SHARED(Locks::mutator_lock_).
+// TODO: Use ObjPtr here.
 template<typename T>
-static void RecordArrayElementsInTransactionImpl(mirror::PrimitiveArray<T>* array, int32_t count)
-    NO_THREAD_SAFETY_ANALYSIS {
+static void RecordArrayElementsInTransactionImpl(mirror::PrimitiveArray<T>* array,
+                                                 int32_t count)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
   Runtime* runtime = Runtime::Current();
   for (int32_t i = 0; i < count; ++i) {
     runtime->RecordWriteArray(array, i, array->GetWithoutChecks(i));
   }
 }
 
-void RecordArrayElementsInTransaction(mirror::Array* array, int32_t count)
+void RecordArrayElementsInTransaction(ObjPtr<mirror::Array> array, int32_t count)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   DCHECK(Runtime::Current()->IsActiveTransaction());
   DCHECK(array != nullptr);
diff --git a/runtime/interpreter/interpreter_common.h b/runtime/interpreter/interpreter_common.h
index 6b28110..9c26d24 100644
--- a/runtime/interpreter/interpreter_common.h
+++ b/runtime/interpreter/interpreter_common.h
@@ -43,25 +43,11 @@
 #include "mirror/object-inl.h"
 #include "mirror/object_array-inl.h"
 #include "mirror/string-inl.h"
+#include "obj_ptr.h"
 #include "stack.h"
 #include "thread.h"
 #include "well_known_classes.h"
 
-using ::art::ArtMethod;
-using ::art::mirror::Array;
-using ::art::mirror::BooleanArray;
-using ::art::mirror::ByteArray;
-using ::art::mirror::CharArray;
-using ::art::mirror::Class;
-using ::art::mirror::ClassLoader;
-using ::art::mirror::IntArray;
-using ::art::mirror::LongArray;
-using ::art::mirror::Object;
-using ::art::mirror::ObjectArray;
-using ::art::mirror::ShortArray;
-using ::art::mirror::String;
-using ::art::mirror::Throwable;
-
 namespace art {
 namespace interpreter {
 
@@ -69,13 +55,11 @@
     REQUIRES_SHARED(Locks::mutator_lock_);
 
 template <bool kMonitorCounting>
-static inline void DoMonitorEnter(Thread* self,
-                                  ShadowFrame* frame,
-                                  Object* ref)
+static inline void DoMonitorEnter(Thread* self, ShadowFrame* frame, ObjPtr<mirror::Object> ref)
     NO_THREAD_SAFETY_ANALYSIS
     REQUIRES(!Roles::uninterruptible_) {
   StackHandleScope<1> hs(self);
-  Handle<Object> h_ref(hs.NewHandle(ref));
+  Handle<mirror::Object> h_ref(hs.NewHandle(ref));
   h_ref->MonitorEnter(self);
   if (kMonitorCounting && frame->GetMethod()->MustCountLocks()) {
     frame->GetLockCountData().AddMonitor(self, h_ref.Get());
@@ -83,13 +67,11 @@
 }
 
 template <bool kMonitorCounting>
-static inline void DoMonitorExit(Thread* self,
-                                 ShadowFrame* frame,
-                                 Object* ref)
+static inline void DoMonitorExit(Thread* self, ShadowFrame* frame, ObjPtr<mirror::Object> ref)
     NO_THREAD_SAFETY_ANALYSIS
     REQUIRES(!Roles::uninterruptible_) {
   StackHandleScope<1> hs(self);
-  Handle<Object> h_ref(hs.NewHandle(ref));
+  Handle<mirror::Object> h_ref(hs.NewHandle(ref));
   h_ref->MonitorExit(self);
   if (kMonitorCounting && frame->GetMethod()->MustCountLocks()) {
     frame->GetLockCountData().RemoveMonitorOrThrow(self, h_ref.Get());
@@ -113,7 +95,7 @@
 void AbortTransactionV(Thread* self, const char* fmt, va_list args)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
-void RecordArrayElementsInTransaction(mirror::Array* array, int32_t count)
+void RecordArrayElementsInTransaction(ObjPtr<mirror::Array> array, int32_t count)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
 // Invokes the given method. This is part of the invocation support and is used by DoInvoke and
@@ -126,11 +108,14 @@
 // Handles all invoke-XXX/range instructions except for invoke-polymorphic[/range].
 // Returns true on success, otherwise throws an exception and returns false.
 template<InvokeType type, bool is_range, bool do_access_check>
-static inline bool DoInvoke(Thread* self, ShadowFrame& shadow_frame, const Instruction* inst,
-                            uint16_t inst_data, JValue* result) {
+static inline bool DoInvoke(Thread* self,
+                            ShadowFrame& shadow_frame,
+                            const Instruction* inst,
+                            uint16_t inst_data,
+                            JValue* result) {
   const uint32_t method_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
   const uint32_t vregC = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c();
-  Object* receiver = (type == kStatic) ? nullptr : shadow_frame.GetVRegReference(vregC);
+  ObjPtr<mirror::Object> receiver = (type == kStatic) ? nullptr : shadow_frame.GetVRegReference(vregC);
   ArtMethod* sf_method = shadow_frame.GetMethod();
   ArtMethod* const called_method = FindMethodFromCode<type, do_access_check>(
       method_idx, &receiver, sf_method, self);
@@ -156,7 +141,7 @@
       instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
       if (UNLIKELY(instrumentation->HasInvokeVirtualOrInterfaceListeners())) {
         instrumentation->InvokeVirtualOrInterface(
-            self, receiver, sf_method, shadow_frame.GetDexPC(), called_method);
+            self, receiver.Ptr(), sf_method, shadow_frame.GetDexPC(), called_method);
       }
     }
     return DoCall<is_range, do_access_check>(called_method, self, shadow_frame, inst, inst_data,
@@ -177,7 +162,7 @@
                                         const Instruction* inst, uint16_t inst_data,
                                         JValue* result) {
   const uint32_t vregC = (is_range) ? inst->VRegC_3rc() : inst->VRegC_35c();
-  Object* const receiver = shadow_frame.GetVRegReference(vregC);
+  ObjPtr<mirror::Object> const receiver = shadow_frame.GetVRegReference(vregC);
   if (UNLIKELY(receiver == nullptr)) {
     // We lost the reference to the method index so we cannot get a more
     // precised exception message.
@@ -185,6 +170,13 @@
     return false;
   }
   const uint32_t vtable_idx = (is_range) ? inst->VRegB_3rc() : inst->VRegB_35c();
+  // Debug code for b/31357497. To be removed.
+  if (kUseReadBarrier) {
+    CHECK(receiver->GetClass() != nullptr)
+        << "Null class found in object " << receiver << " in region type "
+        << Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->
+            RegionSpace()->GetRegionType(receiver.Ptr());
+  }
   CHECK(receiver->GetClass()->ShouldHaveEmbeddedVTable());
   ArtMethod* const called_method = receiver->GetClass()->GetEmbeddedVTableEntry(
       vtable_idx, kRuntimePointerSize);
@@ -207,7 +199,7 @@
     // TODO: Remove the InvokeVirtualOrInterface instrumentation, as it was only used by the JIT.
     if (UNLIKELY(instrumentation->HasInvokeVirtualOrInterfaceListeners())) {
       instrumentation->InvokeVirtualOrInterface(
-          self, receiver, shadow_frame.GetMethod(), shadow_frame.GetDexPC(), called_method);
+          self, receiver.Ptr(), shadow_frame.GetMethod(), shadow_frame.GetDexPC(), called_method);
     }
     // No need to check since we've been quickened.
     return DoCall<is_range, false>(called_method, self, shadow_frame, inst, inst_data, result);
@@ -242,9 +234,11 @@
 
 // Handles string resolution for const-string and const-string-jumbo instructions. Also ensures the
 // java.lang.String class is initialized.
-static inline String* ResolveString(Thread* self, ShadowFrame& shadow_frame, uint32_t string_idx)
+static inline ObjPtr<mirror::String> ResolveString(Thread* self,
+                                                   ShadowFrame& shadow_frame,
+                                                   uint32_t string_idx)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  Class* java_lang_string_class = String::GetJavaLangString();
+  ObjPtr<mirror::Class> java_lang_string_class = mirror::String::GetJavaLangString();
   if (UNLIKELY(!java_lang_string_class->IsInitialized())) {
     ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
     StackHandleScope<1> hs(self);
@@ -255,11 +249,11 @@
     }
   }
   ArtMethod* method = shadow_frame.GetMethod();
-  mirror::Class* declaring_class = method->GetDeclaringClass();
+  ObjPtr<mirror::Class> declaring_class = method->GetDeclaringClass();
   // MethodVerifier refuses methods with string_idx out of bounds.
   DCHECK_LT(string_idx % mirror::DexCache::kDexCacheStringCacheSize,
             declaring_class->GetDexFile().NumStringIds());
-  mirror::String* string_ptr =
+  ObjPtr<mirror::String> string_ptr =
       mirror::StringDexCachePair::Lookup(declaring_class->GetDexCacheStrings(),
                                          string_idx,
                                          mirror::DexCache::kDexCacheStringCacheSize).Read();
@@ -311,8 +305,10 @@
 
 // Handles div-long and div-long-2addr instructions.
 // Returns true on success, otherwise throws a java.lang.ArithmeticException and return false.
-static inline bool DoLongDivide(ShadowFrame& shadow_frame, size_t result_reg,
-                                int64_t dividend, int64_t divisor)
+static inline bool DoLongDivide(ShadowFrame& shadow_frame,
+                                size_t result_reg,
+                                int64_t dividend,
+                                int64_t divisor)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   const int64_t kMinLong = std::numeric_limits<int64_t>::min();
   if (UNLIKELY(divisor == 0)) {
@@ -329,8 +325,10 @@
 
 // Handles rem-long and rem-long-2addr instructions.
 // Returns true on success, otherwise throws a java.lang.ArithmeticException and return false.
-static inline bool DoLongRemainder(ShadowFrame& shadow_frame, size_t result_reg,
-                                   int64_t dividend, int64_t divisor)
+static inline bool DoLongRemainder(ShadowFrame& shadow_frame,
+                                   size_t result_reg,
+                                   int64_t dividend,
+                                   int64_t divisor)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   const int64_t kMinLong = std::numeric_limits<int64_t>::min();
   if (UNLIKELY(divisor == 0)) {
@@ -431,19 +429,19 @@
   if (kTraceExecutionEnabled) {
 #define TRACE_LOG std::cerr
     std::ostringstream oss;
-    oss << PrettyMethod(shadow_frame.GetMethod())
+    oss << shadow_frame.GetMethod()->PrettyMethod()
         << StringPrintf("\n0x%x: ", dex_pc)
         << inst->DumpString(shadow_frame.GetMethod()->GetDexFile()) << "\n";
     for (uint32_t i = 0; i < shadow_frame.NumberOfVRegs(); ++i) {
       uint32_t raw_value = shadow_frame.GetVReg(i);
-      Object* ref_value = shadow_frame.GetVRegReference(i);
+      ObjPtr<mirror::Object> ref_value = shadow_frame.GetVRegReference(i);
       oss << StringPrintf(" vreg%u=0x%08X", i, raw_value);
       if (ref_value != nullptr) {
         if (ref_value->GetClass()->IsStringClass() &&
             !ref_value->AsString()->IsValueNull()) {
           oss << "/java.lang.String \"" << ref_value->AsString()->ToModifiedUtf8() << "\"";
         } else {
-          oss << "/" << PrettyTypeOf(ref_value);
+          oss << "/" << ref_value->PrettyTypeOf();
         }
       }
     }
@@ -456,6 +454,24 @@
   return branch_offset <= 0;
 }
 
+// Assign register 'src_reg' from shadow_frame to register 'dest_reg' into new_shadow_frame.
+static inline void AssignRegister(ShadowFrame* new_shadow_frame, const ShadowFrame& shadow_frame,
+                                  size_t dest_reg, size_t src_reg)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  // Uint required, so that sign extension does not make this wrong on 64b systems
+  uint32_t src_value = shadow_frame.GetVReg(src_reg);
+  ObjPtr<mirror::Object> o = shadow_frame.GetVRegReference<kVerifyNone>(src_reg);
+
+  // If both register locations contains the same value, the register probably holds a reference.
+  // Note: As an optimization, non-moving collectors leave a stale reference value
+  // in the references array even after the original vreg was overwritten to a non-reference.
+  if (src_value == reinterpret_cast<uintptr_t>(o.Ptr())) {
+    new_shadow_frame->SetVRegReference(dest_reg, o.Ptr());
+  } else {
+    new_shadow_frame->SetVReg(dest_reg, src_value);
+  }
+}
+
 void ArtInterpreterToCompiledCodeBridge(Thread* self,
                                         ArtMethod* caller,
                                         const DexFile::CodeItem* code_item,
diff --git a/runtime/interpreter/interpreter_mterp_impl.h b/runtime/interpreter/interpreter_mterp_impl.h
index 90d9f89..1be20fa 100644
--- a/runtime/interpreter/interpreter_mterp_impl.h
+++ b/runtime/interpreter/interpreter_mterp_impl.h
@@ -21,6 +21,7 @@
 #include "base/mutex.h"
 #include "dex_file.h"
 #include "jvalue.h"
+#include "obj_ptr.h"
 
 namespace art {
 
diff --git a/runtime/interpreter/interpreter_switch_impl.cc b/runtime/interpreter/interpreter_switch_impl.cc
index 6cff1da..435ac62 100644
--- a/runtime/interpreter/interpreter_switch_impl.cc
+++ b/runtime/interpreter/interpreter_switch_impl.cc
@@ -20,6 +20,7 @@
 #include "experimental_flags.h"
 #include "interpreter_common.h"
 #include "jit/jit.h"
+#include "jvalue-inl.h"
 #include "safe_math.h"
 
 namespace art {
@@ -191,9 +192,9 @@
         break;
       case Instruction::MOVE_EXCEPTION: {
         PREAMBLE();
-        Throwable* exception = self->GetException();
+        ObjPtr<mirror::Throwable> exception = self->GetException();
         DCHECK(exception != nullptr) << "No pending exception on MOVE_EXCEPTION instruction";
-        shadow_frame.SetVRegReference(inst->VRegA_11x(inst_data), exception);
+        shadow_frame.SetVRegReference(inst->VRegA_11x(inst_data), exception.Ptr());
         self->ClearException();
         inst = inst->Next_1xx();
         break;
@@ -272,11 +273,11 @@
         self->AllowThreadSuspension();
         HANDLE_MONITOR_CHECKS();
         const size_t ref_idx = inst->VRegA_11x(inst_data);
-        Object* obj_result = shadow_frame.GetVRegReference(ref_idx);
+        ObjPtr<mirror::Object> obj_result = shadow_frame.GetVRegReference(ref_idx);
         if (do_assignability_check && obj_result != nullptr) {
           PointerSize pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
-          Class* return_type = shadow_frame.GetMethod()->GetReturnType(true /* resolve */,
-                                                                       pointer_size);
+          ObjPtr<mirror::Class> return_type = method->GetReturnType(true /* resolve */,
+                                                                    pointer_size);
           // Re-load since it might have moved.
           obj_result = shadow_frame.GetVRegReference(ref_idx);
           if (return_type == nullptr) {
@@ -372,41 +373,44 @@
         break;
       case Instruction::CONST_STRING: {
         PREAMBLE();
-        String* s = ResolveString(self, shadow_frame,  inst->VRegB_21c());
+        ObjPtr<mirror::String> s = ResolveString(self, shadow_frame,  inst->VRegB_21c());
         if (UNLIKELY(s == nullptr)) {
           HANDLE_PENDING_EXCEPTION();
         } else {
-          shadow_frame.SetVRegReference(inst->VRegA_21c(inst_data), s);
+          shadow_frame.SetVRegReference(inst->VRegA_21c(inst_data), s.Ptr());
           inst = inst->Next_2xx();
         }
         break;
       }
       case Instruction::CONST_STRING_JUMBO: {
         PREAMBLE();
-        String* s = ResolveString(self, shadow_frame,  inst->VRegB_31c());
+        ObjPtr<mirror::String> s = ResolveString(self, shadow_frame,  inst->VRegB_31c());
         if (UNLIKELY(s == nullptr)) {
           HANDLE_PENDING_EXCEPTION();
         } else {
-          shadow_frame.SetVRegReference(inst->VRegA_31c(inst_data), s);
+          shadow_frame.SetVRegReference(inst->VRegA_31c(inst_data), s.Ptr());
           inst = inst->Next_3xx();
         }
         break;
       }
       case Instruction::CONST_CLASS: {
         PREAMBLE();
-        Class* c = ResolveVerifyAndClinit(inst->VRegB_21c(), shadow_frame.GetMethod(),
-                                          self, false, do_access_check);
+        ObjPtr<mirror::Class> c = ResolveVerifyAndClinit(inst->VRegB_21c(),
+                                                         shadow_frame.GetMethod(),
+                                                         self,
+                                                         false,
+                                                         do_access_check);
         if (UNLIKELY(c == nullptr)) {
           HANDLE_PENDING_EXCEPTION();
         } else {
-          shadow_frame.SetVRegReference(inst->VRegA_21c(inst_data), c);
+          shadow_frame.SetVRegReference(inst->VRegA_21c(inst_data), c.Ptr());
           inst = inst->Next_2xx();
         }
         break;
       }
       case Instruction::MONITOR_ENTER: {
         PREAMBLE();
-        Object* obj = shadow_frame.GetVRegReference(inst->VRegA_11x(inst_data));
+        ObjPtr<mirror::Object> obj = shadow_frame.GetVRegReference(inst->VRegA_11x(inst_data));
         if (UNLIKELY(obj == nullptr)) {
           ThrowNullPointerExceptionFromInterpreter();
           HANDLE_PENDING_EXCEPTION();
@@ -418,7 +422,7 @@
       }
       case Instruction::MONITOR_EXIT: {
         PREAMBLE();
-        Object* obj = shadow_frame.GetVRegReference(inst->VRegA_11x(inst_data));
+        ObjPtr<mirror::Object> obj = shadow_frame.GetVRegReference(inst->VRegA_11x(inst_data));
         if (UNLIKELY(obj == nullptr)) {
           ThrowNullPointerExceptionFromInterpreter();
           HANDLE_PENDING_EXCEPTION();
@@ -430,12 +434,15 @@
       }
       case Instruction::CHECK_CAST: {
         PREAMBLE();
-        Class* c = ResolveVerifyAndClinit(inst->VRegB_21c(), shadow_frame.GetMethod(),
-                                          self, false, do_access_check);
+        ObjPtr<mirror::Class> c = ResolveVerifyAndClinit(inst->VRegB_21c(),
+                                                         shadow_frame.GetMethod(),
+                                                         self,
+                                                         false,
+                                                         do_access_check);
         if (UNLIKELY(c == nullptr)) {
           HANDLE_PENDING_EXCEPTION();
         } else {
-          Object* obj = shadow_frame.GetVRegReference(inst->VRegA_21c(inst_data));
+          ObjPtr<mirror::Object> obj = shadow_frame.GetVRegReference(inst->VRegA_21c(inst_data));
           if (UNLIKELY(obj != nullptr && !obj->InstanceOf(c))) {
             ThrowClassCastException(c, obj->GetClass());
             HANDLE_PENDING_EXCEPTION();
@@ -447,12 +454,15 @@
       }
       case Instruction::INSTANCE_OF: {
         PREAMBLE();
-        Class* c = ResolveVerifyAndClinit(inst->VRegC_22c(), shadow_frame.GetMethod(),
-                                          self, false, do_access_check);
+        ObjPtr<mirror::Class> c = ResolveVerifyAndClinit(inst->VRegC_22c(),
+                                                         shadow_frame.GetMethod(),
+                                                         self,
+                                                         false,
+                                                         do_access_check);
         if (UNLIKELY(c == nullptr)) {
           HANDLE_PENDING_EXCEPTION();
         } else {
-          Object* obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
+          ObjPtr<mirror::Object> obj = shadow_frame.GetVRegReference(inst->VRegB_22c(inst_data));
           shadow_frame.SetVReg(inst->VRegA_22c(inst_data),
                                (obj != nullptr && obj->InstanceOf(c)) ? 1 : 0);
           inst = inst->Next_2xx();
@@ -461,7 +471,7 @@
       }
       case Instruction::ARRAY_LENGTH:  {
         PREAMBLE();
-        Object* array = shadow_frame.GetVRegReference(inst->VRegB_12x(inst_data));
+        ObjPtr<mirror::Object> array = shadow_frame.GetVRegReference(inst->VRegB_12x(inst_data));
         if (UNLIKELY(array == nullptr)) {
           ThrowNullPointerExceptionFromInterpreter();
           HANDLE_PENDING_EXCEPTION();
@@ -473,9 +483,12 @@
       }
       case Instruction::NEW_INSTANCE: {
         PREAMBLE();
-        Object* obj = nullptr;
-        Class* c = ResolveVerifyAndClinit(inst->VRegB_21c(), shadow_frame.GetMethod(),
-                                          self, false, do_access_check);
+        ObjPtr<mirror::Object> obj = nullptr;
+        ObjPtr<mirror::Class> c = ResolveVerifyAndClinit(inst->VRegB_21c(),
+                                                         shadow_frame.GetMethod(),
+                                                         self,
+                                                         false,
+                                                         do_access_check);
         if (LIKELY(c != nullptr)) {
           if (UNLIKELY(c->IsStringClass())) {
             gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
@@ -494,11 +507,11 @@
           // be finalized without a started runtime.
           if (transaction_active && obj->GetClass()->IsFinalizable()) {
             AbortTransactionF(self, "Allocating finalizable object in transaction: %s",
-                              PrettyTypeOf(obj).c_str());
+                              obj->PrettyTypeOf().c_str());
             HANDLE_PENDING_EXCEPTION();
             break;
           }
-          shadow_frame.SetVRegReference(inst->VRegA_21c(inst_data), obj);
+          shadow_frame.SetVRegReference(inst->VRegA_21c(inst_data), obj.Ptr());
           inst = inst->Next_2xx();
         }
         break;
@@ -506,13 +519,13 @@
       case Instruction::NEW_ARRAY: {
         PREAMBLE();
         int32_t length = shadow_frame.GetVReg(inst->VRegB_22c(inst_data));
-        Object* obj = AllocArrayFromCode<do_access_check, true>(
+        ObjPtr<mirror::Object> obj = AllocArrayFromCode<do_access_check, true>(
             inst->VRegC_22c(), length, shadow_frame.GetMethod(), self,
             Runtime::Current()->GetHeap()->GetCurrentAllocator());
         if (UNLIKELY(obj == nullptr)) {
           HANDLE_PENDING_EXCEPTION();
         } else {
-          shadow_frame.SetVRegReference(inst->VRegA_22c(inst_data), obj);
+          shadow_frame.SetVRegReference(inst->VRegA_22c(inst_data), obj.Ptr());
           inst = inst->Next_2xx();
         }
         break;
@@ -538,7 +551,7 @@
         const uint16_t* payload_addr = reinterpret_cast<const uint16_t*>(inst) + inst->VRegB_31t();
         const Instruction::ArrayDataPayload* payload =
             reinterpret_cast<const Instruction::ArrayDataPayload*>(payload_addr);
-        Object* obj = shadow_frame.GetVRegReference(inst->VRegA_31t(inst_data));
+        ObjPtr<mirror::Object> obj = shadow_frame.GetVRegReference(inst->VRegA_31t(inst_data));
         bool success = FillArrayData(obj, payload);
         if (!success) {
           HANDLE_PENDING_EXCEPTION();
@@ -552,7 +565,8 @@
       }
       case Instruction::THROW: {
         PREAMBLE();
-        Object* exception = shadow_frame.GetVRegReference(inst->VRegA_11x(inst_data));
+        ObjPtr<mirror::Object> exception =
+            shadow_frame.GetVRegReference(inst->VRegA_11x(inst_data));
         if (UNLIKELY(exception == nullptr)) {
           ThrowNullPointerException("throw with null exception");
         } else if (do_assignability_check && !exception->GetClass()->IsThrowableClass()) {
@@ -910,14 +924,14 @@
       }
       case Instruction::AGET_BOOLEAN: {
         PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        ObjPtr<mirror::Object> a = shadow_frame.GetVRegReference(inst->VRegB_23x());
         if (UNLIKELY(a == nullptr)) {
           ThrowNullPointerExceptionFromInterpreter();
           HANDLE_PENDING_EXCEPTION();
           break;
         }
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        BooleanArray* array = a->AsBooleanArray();
+        ObjPtr<mirror::BooleanArray> array = a->AsBooleanArray();
         if (array->CheckIsValidIndex(index)) {
           shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
           inst = inst->Next_2xx();
@@ -928,14 +942,14 @@
       }
       case Instruction::AGET_BYTE: {
         PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        ObjPtr<mirror::Object> a = shadow_frame.GetVRegReference(inst->VRegB_23x());
         if (UNLIKELY(a == nullptr)) {
           ThrowNullPointerExceptionFromInterpreter();
           HANDLE_PENDING_EXCEPTION();
           break;
         }
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        ByteArray* array = a->AsByteArray();
+        ObjPtr<mirror::ByteArray> array = a->AsByteArray();
         if (array->CheckIsValidIndex(index)) {
           shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
           inst = inst->Next_2xx();
@@ -946,14 +960,14 @@
       }
       case Instruction::AGET_CHAR: {
         PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        ObjPtr<mirror::Object> a = shadow_frame.GetVRegReference(inst->VRegB_23x());
         if (UNLIKELY(a == nullptr)) {
           ThrowNullPointerExceptionFromInterpreter();
           HANDLE_PENDING_EXCEPTION();
           break;
         }
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        CharArray* array = a->AsCharArray();
+        ObjPtr<mirror::CharArray> array = a->AsCharArray();
         if (array->CheckIsValidIndex(index)) {
           shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
           inst = inst->Next_2xx();
@@ -964,14 +978,14 @@
       }
       case Instruction::AGET_SHORT: {
         PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        ObjPtr<mirror::Object> a = shadow_frame.GetVRegReference(inst->VRegB_23x());
         if (UNLIKELY(a == nullptr)) {
           ThrowNullPointerExceptionFromInterpreter();
           HANDLE_PENDING_EXCEPTION();
           break;
         }
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        ShortArray* array = a->AsShortArray();
+        ObjPtr<mirror::ShortArray> array = a->AsShortArray();
         if (array->CheckIsValidIndex(index)) {
           shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
           inst = inst->Next_2xx();
@@ -982,15 +996,15 @@
       }
       case Instruction::AGET: {
         PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        ObjPtr<mirror::Object> a = shadow_frame.GetVRegReference(inst->VRegB_23x());
         if (UNLIKELY(a == nullptr)) {
           ThrowNullPointerExceptionFromInterpreter();
           HANDLE_PENDING_EXCEPTION();
           break;
         }
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        DCHECK(a->IsIntArray() || a->IsFloatArray()) << PrettyTypeOf(a);
-        auto* array = down_cast<IntArray*>(a);
+        DCHECK(a->IsIntArray() || a->IsFloatArray()) << a->PrettyTypeOf();
+        ObjPtr<mirror::IntArray> array = ObjPtr<mirror::IntArray>::DownCast(a);
         if (array->CheckIsValidIndex(index)) {
           shadow_frame.SetVReg(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
           inst = inst->Next_2xx();
@@ -1001,15 +1015,15 @@
       }
       case Instruction::AGET_WIDE:  {
         PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        ObjPtr<mirror::Object> a = shadow_frame.GetVRegReference(inst->VRegB_23x());
         if (UNLIKELY(a == nullptr)) {
           ThrowNullPointerExceptionFromInterpreter();
           HANDLE_PENDING_EXCEPTION();
           break;
         }
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        DCHECK(a->IsLongArray() || a->IsDoubleArray()) << PrettyTypeOf(a);
-        auto* array = down_cast<LongArray*>(a);
+        DCHECK(a->IsLongArray() || a->IsDoubleArray()) << a->PrettyTypeOf();
+        ObjPtr<mirror::LongArray> array = ObjPtr<mirror::LongArray>::DownCast(a);
         if (array->CheckIsValidIndex(index)) {
           shadow_frame.SetVRegLong(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
           inst = inst->Next_2xx();
@@ -1020,14 +1034,14 @@
       }
       case Instruction::AGET_OBJECT: {
         PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        ObjPtr<mirror::Object> a = shadow_frame.GetVRegReference(inst->VRegB_23x());
         if (UNLIKELY(a == nullptr)) {
           ThrowNullPointerExceptionFromInterpreter();
           HANDLE_PENDING_EXCEPTION();
           break;
         }
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        ObjectArray<Object>* array = a->AsObjectArray<Object>();
+        ObjPtr<mirror::ObjectArray<mirror::Object>> array = a->AsObjectArray<mirror::Object>();
         if (array->CheckIsValidIndex(index)) {
           shadow_frame.SetVRegReference(inst->VRegA_23x(inst_data), array->GetWithoutChecks(index));
           inst = inst->Next_2xx();
@@ -1038,7 +1052,7 @@
       }
       case Instruction::APUT_BOOLEAN: {
         PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        ObjPtr<mirror::Object> a = shadow_frame.GetVRegReference(inst->VRegB_23x());
         if (UNLIKELY(a == nullptr)) {
           ThrowNullPointerExceptionFromInterpreter();
           HANDLE_PENDING_EXCEPTION();
@@ -1046,7 +1060,7 @@
         }
         uint8_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        BooleanArray* array = a->AsBooleanArray();
+        ObjPtr<mirror::BooleanArray> array = a->AsBooleanArray();
         if (array->CheckIsValidIndex(index)) {
           array->SetWithoutChecks<transaction_active>(index, val);
           inst = inst->Next_2xx();
@@ -1057,7 +1071,7 @@
       }
       case Instruction::APUT_BYTE: {
         PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        ObjPtr<mirror::Object> a = shadow_frame.GetVRegReference(inst->VRegB_23x());
         if (UNLIKELY(a == nullptr)) {
           ThrowNullPointerExceptionFromInterpreter();
           HANDLE_PENDING_EXCEPTION();
@@ -1065,7 +1079,7 @@
         }
         int8_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        ByteArray* array = a->AsByteArray();
+        ObjPtr<mirror::ByteArray> array = a->AsByteArray();
         if (array->CheckIsValidIndex(index)) {
           array->SetWithoutChecks<transaction_active>(index, val);
           inst = inst->Next_2xx();
@@ -1076,7 +1090,7 @@
       }
       case Instruction::APUT_CHAR: {
         PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        ObjPtr<mirror::Object> a = shadow_frame.GetVRegReference(inst->VRegB_23x());
         if (UNLIKELY(a == nullptr)) {
           ThrowNullPointerExceptionFromInterpreter();
           HANDLE_PENDING_EXCEPTION();
@@ -1084,7 +1098,7 @@
         }
         uint16_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        CharArray* array = a->AsCharArray();
+        ObjPtr<mirror::CharArray> array = a->AsCharArray();
         if (array->CheckIsValidIndex(index)) {
           array->SetWithoutChecks<transaction_active>(index, val);
           inst = inst->Next_2xx();
@@ -1095,7 +1109,7 @@
       }
       case Instruction::APUT_SHORT: {
         PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        ObjPtr<mirror::Object> a = shadow_frame.GetVRegReference(inst->VRegB_23x());
         if (UNLIKELY(a == nullptr)) {
           ThrowNullPointerExceptionFromInterpreter();
           HANDLE_PENDING_EXCEPTION();
@@ -1103,7 +1117,7 @@
         }
         int16_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        ShortArray* array = a->AsShortArray();
+        ObjPtr<mirror::ShortArray> array = a->AsShortArray();
         if (array->CheckIsValidIndex(index)) {
           array->SetWithoutChecks<transaction_active>(index, val);
           inst = inst->Next_2xx();
@@ -1114,7 +1128,7 @@
       }
       case Instruction::APUT: {
         PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        ObjPtr<mirror::Object> a = shadow_frame.GetVRegReference(inst->VRegB_23x());
         if (UNLIKELY(a == nullptr)) {
           ThrowNullPointerExceptionFromInterpreter();
           HANDLE_PENDING_EXCEPTION();
@@ -1122,8 +1136,8 @@
         }
         int32_t val = shadow_frame.GetVReg(inst->VRegA_23x(inst_data));
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        DCHECK(a->IsIntArray() || a->IsFloatArray()) << PrettyTypeOf(a);
-        auto* array = down_cast<IntArray*>(a);
+        DCHECK(a->IsIntArray() || a->IsFloatArray()) << a->PrettyTypeOf();
+        ObjPtr<mirror::IntArray> array = ObjPtr<mirror::IntArray>::DownCast(a);
         if (array->CheckIsValidIndex(index)) {
           array->SetWithoutChecks<transaction_active>(index, val);
           inst = inst->Next_2xx();
@@ -1134,7 +1148,7 @@
       }
       case Instruction::APUT_WIDE: {
         PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        ObjPtr<mirror::Object> a = shadow_frame.GetVRegReference(inst->VRegB_23x());
         if (UNLIKELY(a == nullptr)) {
           ThrowNullPointerExceptionFromInterpreter();
           HANDLE_PENDING_EXCEPTION();
@@ -1142,8 +1156,8 @@
         }
         int64_t val = shadow_frame.GetVRegLong(inst->VRegA_23x(inst_data));
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        DCHECK(a->IsLongArray() || a->IsDoubleArray()) << PrettyTypeOf(a);
-        LongArray* array = down_cast<LongArray*>(a);
+        DCHECK(a->IsLongArray() || a->IsDoubleArray()) << a->PrettyTypeOf();
+        ObjPtr<mirror::LongArray> array = ObjPtr<mirror::LongArray>::DownCast(a);
         if (array->CheckIsValidIndex(index)) {
           array->SetWithoutChecks<transaction_active>(index, val);
           inst = inst->Next_2xx();
@@ -1154,15 +1168,15 @@
       }
       case Instruction::APUT_OBJECT: {
         PREAMBLE();
-        Object* a = shadow_frame.GetVRegReference(inst->VRegB_23x());
+        ObjPtr<mirror::Object> a = shadow_frame.GetVRegReference(inst->VRegB_23x());
         if (UNLIKELY(a == nullptr)) {
           ThrowNullPointerExceptionFromInterpreter();
           HANDLE_PENDING_EXCEPTION();
           break;
         }
         int32_t index = shadow_frame.GetVReg(inst->VRegC_23x());
-        Object* val = shadow_frame.GetVRegReference(inst->VRegA_23x(inst_data));
-        ObjectArray<Object>* array = a->AsObjectArray<Object>();
+        ObjPtr<mirror::Object> val = shadow_frame.GetVRegReference(inst->VRegA_23x(inst_data));
+        ObjPtr<mirror::ObjectArray<mirror::Object>> array = a->AsObjectArray<mirror::Object>();
         if (array->CheckIsValidIndex(index) && array->CheckAssignable(val)) {
           array->SetWithoutChecks<transaction_active>(index, val);
           inst = inst->Next_2xx();
@@ -1544,6 +1558,7 @@
       }
       case Instruction::INVOKE_POLYMORPHIC: {
         PREAMBLE();
+        DCHECK(Runtime::Current()->IsMethodHandlesEnabled());
         bool success = DoInvokePolymorphic<false, do_access_check>(
             self, shadow_frame, inst, inst_data, &result_register);
         POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_4xx);
@@ -1551,11 +1566,11 @@
       }
       case Instruction::INVOKE_POLYMORPHIC_RANGE: {
         PREAMBLE();
+        DCHECK(Runtime::Current()->IsMethodHandlesEnabled());
         bool success = DoInvokePolymorphic<true, do_access_check>(
             self, shadow_frame, inst, inst_data, &result_register);
         POSSIBLY_HANDLE_PENDING_EXCEPTION(!success, Next_4xx);
         break;
-        break;
       }
       case Instruction::NEG_INT:
         PREAMBLE();
diff --git a/runtime/interpreter/interpreter_switch_impl.h b/runtime/interpreter/interpreter_switch_impl.h
index d0c9386..267df2e 100644
--- a/runtime/interpreter/interpreter_switch_impl.h
+++ b/runtime/interpreter/interpreter_switch_impl.h
@@ -21,6 +21,7 @@
 #include "base/mutex.h"
 #include "dex_file.h"
 #include "jvalue.h"
+#include "obj_ptr.h"
 
 namespace art {
 
diff --git a/runtime/interpreter/mterp/arm/footer.S b/runtime/interpreter/mterp/arm/footer.S
index 62e573a..cd32ea2 100644
--- a/runtime/interpreter/mterp/arm/footer.S
+++ b/runtime/interpreter/mterp/arm/footer.S
@@ -156,7 +156,7 @@
     REFRESH_IBASE
     add     r2, rINST, rINST            @ r2<- byte offset
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     bne     .L_suspend_request_pending
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/arm/op_return.S b/runtime/interpreter/mterp/arm/op_return.S
index 1888373..f9c0f0f 100644
--- a/runtime/interpreter/mterp/arm/op_return.S
+++ b/runtime/interpreter/mterp/arm/op_return.S
@@ -8,7 +8,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     mov     r0, rSELF
-    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     blne    MterpSuspendCheck                       @ (self)
     mov     r2, rINST, lsr #8           @ r2<- AA
     GET_VREG r0, r2                     @ r0<- vAA
diff --git a/runtime/interpreter/mterp/arm/op_return_void.S b/runtime/interpreter/mterp/arm/op_return_void.S
index cbea2bf..a91ccb3 100644
--- a/runtime/interpreter/mterp/arm/op_return_void.S
+++ b/runtime/interpreter/mterp/arm/op_return_void.S
@@ -2,7 +2,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     mov     r0, rSELF
-    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     blne    MterpSuspendCheck                       @ (self)
     mov    r0, #0
     mov    r1, #0
diff --git a/runtime/interpreter/mterp/arm/op_return_void_no_barrier.S b/runtime/interpreter/mterp/arm/op_return_void_no_barrier.S
index 2dde7ae..b953f4c 100644
--- a/runtime/interpreter/mterp/arm/op_return_void_no_barrier.S
+++ b/runtime/interpreter/mterp/arm/op_return_void_no_barrier.S
@@ -1,6 +1,6 @@
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     mov     r0, rSELF
-    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     blne    MterpSuspendCheck                       @ (self)
     mov    r0, #0
     mov    r1, #0
diff --git a/runtime/interpreter/mterp/arm/op_return_wide.S b/runtime/interpreter/mterp/arm/op_return_wide.S
index ceae878..df582c0 100644
--- a/runtime/interpreter/mterp/arm/op_return_wide.S
+++ b/runtime/interpreter/mterp/arm/op_return_wide.S
@@ -6,7 +6,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     mov     r0, rSELF
-    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     blne    MterpSuspendCheck                       @ (self)
     mov     r2, rINST, lsr #8           @ r2<- AA
     VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[AA]
diff --git a/runtime/interpreter/mterp/arm/op_unused_fa.S b/runtime/interpreter/mterp/arm/op_unused_fa.S
deleted file mode 100644
index 10948dc..0000000
--- a/runtime/interpreter/mterp/arm/op_unused_fa.S
+++ /dev/null
@@ -1 +0,0 @@
-%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm/op_unused_fb.S b/runtime/interpreter/mterp/arm/op_unused_fb.S
deleted file mode 100644
index 10948dc..0000000
--- a/runtime/interpreter/mterp/arm/op_unused_fb.S
+++ /dev/null
@@ -1 +0,0 @@
-%include "arm/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/footer.S b/runtime/interpreter/mterp/arm64/footer.S
index 7628ed3..ada0326 100644
--- a/runtime/interpreter/mterp/arm64/footer.S
+++ b/runtime/interpreter/mterp/arm64/footer.S
@@ -141,7 +141,7 @@
     add     w2, wINST, wINST            // w2<- byte offset
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     REFRESH_IBASE
-    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.ne    .L_suspend_request_pending
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
@@ -215,7 +215,7 @@
  */
 MterpCheckSuspendAndContinue:
     ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh xIBASE
-    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    w7, w7, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.ne    check1
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
@@ -270,7 +270,7 @@
     ldr     lr, [xSELF, #THREAD_FLAGS_OFFSET]
     str     x0, [x2]
     mov     x0, xSELF
-    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.eq    check2
     bl      MterpSuspendCheck                       // (self)
 check2:
diff --git a/runtime/interpreter/mterp/arm64/op_return.S b/runtime/interpreter/mterp/arm64/op_return.S
index 28630ee..9f125c7 100644
--- a/runtime/interpreter/mterp/arm64/op_return.S
+++ b/runtime/interpreter/mterp/arm64/op_return.S
@@ -8,7 +8,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     x0, xSELF
-    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    w7, w7, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.ne    .L${opcode}_check
 .L${opcode}_return:
     lsr     w2, wINST, #8               // r2<- AA
diff --git a/runtime/interpreter/mterp/arm64/op_return_void.S b/runtime/interpreter/mterp/arm64/op_return_void.S
index 3a5aa56..b253006 100644
--- a/runtime/interpreter/mterp/arm64/op_return_void.S
+++ b/runtime/interpreter/mterp/arm64/op_return_void.S
@@ -2,7 +2,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     x0, xSELF
-    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    w7, w7, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.ne    .L${opcode}_check
 .L${opcode}_return:
     mov     x0, #0
diff --git a/runtime/interpreter/mterp/arm64/op_return_void_no_barrier.S b/runtime/interpreter/mterp/arm64/op_return_void_no_barrier.S
index 1e06953..c817169 100644
--- a/runtime/interpreter/mterp/arm64/op_return_void_no_barrier.S
+++ b/runtime/interpreter/mterp/arm64/op_return_void_no_barrier.S
@@ -1,6 +1,6 @@
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     x0, xSELF
-    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    w7, w7, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.ne    .L${opcode}_check
 .L${opcode}_return:
     mov     x0, #0
diff --git a/runtime/interpreter/mterp/arm64/op_return_wide.S b/runtime/interpreter/mterp/arm64/op_return_wide.S
index c6e1d9d..c47661c 100644
--- a/runtime/interpreter/mterp/arm64/op_return_wide.S
+++ b/runtime/interpreter/mterp/arm64/op_return_wide.S
@@ -7,7 +7,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     x0, xSELF
-    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    w7, w7, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.ne    .L${opcode}_check
 .L${opcode}_return:
     lsr     w2, wINST, #8               // w2<- AA
diff --git a/runtime/interpreter/mterp/arm64/op_unused_fa.S b/runtime/interpreter/mterp/arm64/op_unused_fa.S
deleted file mode 100644
index 204ecef..0000000
--- a/runtime/interpreter/mterp/arm64/op_unused_fa.S
+++ /dev/null
@@ -1 +0,0 @@
-%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/arm64/op_unused_fb.S b/runtime/interpreter/mterp/arm64/op_unused_fb.S
deleted file mode 100644
index 204ecef..0000000
--- a/runtime/interpreter/mterp/arm64/op_unused_fb.S
+++ /dev/null
@@ -1 +0,0 @@
-%include "arm64/unused.S"
diff --git a/runtime/interpreter/mterp/config_arm b/runtime/interpreter/mterp/config_arm
index b6caf11..6d9774c 100644
--- a/runtime/interpreter/mterp/config_arm
+++ b/runtime/interpreter/mterp/config_arm
@@ -286,8 +286,8 @@
     # op op_unused_f7 FALLBACK
     # op op_unused_f8 FALLBACK
     # op op_unused_f9 FALLBACK
-    # op op_unused_fa FALLBACK
-    # op op_unused_fb FALLBACK
+    op op_invoke_polymorphic FALLBACK
+    op op_invoke_polymorphic_range FALLBACK
     # op op_unused_fc FALLBACK
     # op op_unused_fd FALLBACK
     # op op_unused_fe FALLBACK
diff --git a/runtime/interpreter/mterp/config_arm64 b/runtime/interpreter/mterp/config_arm64
index c5e06c7..9f32695 100644
--- a/runtime/interpreter/mterp/config_arm64
+++ b/runtime/interpreter/mterp/config_arm64
@@ -284,8 +284,8 @@
     # op op_unused_f7 FALLBACK
     # op op_unused_f8 FALLBACK
     # op op_unused_f9 FALLBACK
-    # op op_unused_fa FALLBACK
-    # op op_unused_fb FALLBACK
+    op op_invoke_polymorphic FALLBACK
+    op op_invoke_polymorphic_range FALLBACK
     # op op_unused_fc FALLBACK
     # op op_unused_fd FALLBACK
     # op op_unused_fe FALLBACK
diff --git a/runtime/interpreter/mterp/config_mips b/runtime/interpreter/mterp/config_mips
index 515cb0b..708a22b 100644
--- a/runtime/interpreter/mterp/config_mips
+++ b/runtime/interpreter/mterp/config_mips
@@ -286,8 +286,8 @@
     # op op_unused_f7 FALLBACK
     # op op_unused_f8 FALLBACK
     # op op_unused_f9 FALLBACK
-    # op op_unused_fa FALLBACK
-    # op op_unused_fb FALLBACK
+    op op_invoke_polymorphic FALLBACK
+    op op_invoke_polymorphic_range FALLBACK
     # op op_unused_fc FALLBACK
     # op op_unused_fd FALLBACK
     # op op_unused_fe FALLBACK
diff --git a/runtime/interpreter/mterp/config_mips64 b/runtime/interpreter/mterp/config_mips64
index aafd248..7643a48 100644
--- a/runtime/interpreter/mterp/config_mips64
+++ b/runtime/interpreter/mterp/config_mips64
@@ -286,8 +286,8 @@
     # op op_unused_f7 FALLBACK
     # op op_unused_f8 FALLBACK
     # op op_unused_f9 FALLBACK
-    # op op_unused_fa FALLBACK
-    # op op_unused_fb FALLBACK
+    op op_invoke_polymorphic FALLBACK
+    op op_invoke_polymorphic_range FALLBACK
     # op op_unused_fc FALLBACK
     # op op_unused_fd FALLBACK
     # op op_unused_fe FALLBACK
diff --git a/runtime/interpreter/mterp/config_x86 b/runtime/interpreter/mterp/config_x86
index 64d8ee8..f454786 100644
--- a/runtime/interpreter/mterp/config_x86
+++ b/runtime/interpreter/mterp/config_x86
@@ -290,8 +290,8 @@
     # op op_unused_f7 FALLBACK
     # op op_unused_f8 FALLBACK
     # op op_unused_f9 FALLBACK
-    # op op_unused_fa FALLBACK
-    # op op_unused_fb FALLBACK
+    op op_invoke_polymorphic FALLBACK
+    op op_invoke_polymorphic_range FALLBACK
     # op op_unused_fc FALLBACK
     # op op_unused_fd FALLBACK
     # op op_unused_fe FALLBACK
diff --git a/runtime/interpreter/mterp/config_x86_64 b/runtime/interpreter/mterp/config_x86_64
index 7c357db..dbfd3d1 100644
--- a/runtime/interpreter/mterp/config_x86_64
+++ b/runtime/interpreter/mterp/config_x86_64
@@ -290,8 +290,8 @@
     # op op_unused_f7 FALLBACK
     # op op_unused_f8 FALLBACK
     # op op_unused_f9 FALLBACK
-    # op op_unused_fa FALLBACK
-    # op op_unused_fb FALLBACK
+    op op_invoke_polymorphic FALLBACK
+    op op_invoke_polymorphic_range FALLBACK
     # op op_unused_fc FALLBACK
     # op op_unused_fd FALLBACK
     # op op_unused_fe FALLBACK
diff --git a/runtime/interpreter/mterp/mips/binop.S b/runtime/interpreter/mterp/mips/binop.S
index 66627e2..862d95a 100644
--- a/runtime/interpreter/mterp/mips/binop.S
+++ b/runtime/interpreter/mterp/mips/binop.S
@@ -30,4 +30,3 @@
     $instr                                 #  $result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO($result, rOBJ, t0)       #  vAA <- $result
-    /* 11-14 instructions */
diff --git a/runtime/interpreter/mterp/mips/binop2addr.S b/runtime/interpreter/mterp/mips/binop2addr.S
index 548cbcb..17aa8eb 100644
--- a/runtime/interpreter/mterp/mips/binop2addr.S
+++ b/runtime/interpreter/mterp/mips/binop2addr.S
@@ -25,5 +25,4 @@
     $preinstr                              #  optional op
     $instr                                 #  $result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO($result, rOBJ, t0)       #  vAA <- $result
-    /* 10-13 instructions */
+    SET_VREG_GOTO($result, rOBJ, t0)       #  vA <- $result
diff --git a/runtime/interpreter/mterp/mips/binopLit16.S b/runtime/interpreter/mterp/mips/binopLit16.S
index fc0c9ff..0696e7a 100644
--- a/runtime/interpreter/mterp/mips/binopLit16.S
+++ b/runtime/interpreter/mterp/mips/binopLit16.S
@@ -11,12 +11,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if $chkzero
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -26,5 +25,4 @@
     $preinstr                              #  optional op
     $instr                                 #  $result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO($result, rOBJ, t0)       #  vAA <- $result
-    /* 10-13 instructions */
+    SET_VREG_GOTO($result, rOBJ, t0)       #  vA <- $result
diff --git a/runtime/interpreter/mterp/mips/binopLit8.S b/runtime/interpreter/mterp/mips/binopLit8.S
index a591408..382dd2b 100644
--- a/runtime/interpreter/mterp/mips/binopLit8.S
+++ b/runtime/interpreter/mterp/mips/binopLit8.S
@@ -12,7 +12,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -28,4 +28,3 @@
     $instr                                 #  $result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO($result, rOBJ, t0)       #  vAA <- $result
-    /* 10-12 instructions */
diff --git a/runtime/interpreter/mterp/mips/binopWide.S b/runtime/interpreter/mterp/mips/binopWide.S
index 608525b..604134d 100644
--- a/runtime/interpreter/mterp/mips/binopWide.S
+++ b/runtime/interpreter/mterp/mips/binopWide.S
@@ -3,10 +3,10 @@
      * Generic 64-bit binary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vCC (a2-a3).  Useful for integer division and modulus.
      *
      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
      *      xor-long
@@ -32,4 +32,3 @@
     $instr                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG64_GOTO($result0, $result1, rOBJ, t0)   #  vAA/vAA+1 <- $result0/$result1
-    /* 14-17 instructions */
diff --git a/runtime/interpreter/mterp/mips/binopWide2addr.S b/runtime/interpreter/mterp/mips/binopWide2addr.S
index cc92149..f96fdb2 100644
--- a/runtime/interpreter/mterp/mips/binopWide2addr.S
+++ b/runtime/interpreter/mterp/mips/binopWide2addr.S
@@ -3,22 +3,21 @@
      * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vB (a2-a3).  Useful for integer division and modulus.
      *
      * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
      *      and-long/2addr, or-long/2addr, xor-long/2addr
-     *      rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a1)                            #  a1 <- B
     EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
     EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
-    LOAD64($arg2, $arg3, a1)               #  a2/a3 <- vBB/vBB+1
-    LOAD64($arg0, $arg1, t0)               #  a0/a1 <- vAA/vAA+1
+    LOAD64($arg2, $arg3, a1)               #  a2/a3 <- vB/vB+1
+    LOAD64($arg0, $arg1, t0)               #  a0/a1 <- vA/vA+1
     .if $chkzero
     or        t0, $arg2, $arg3             #  second arg (a2-a3) is zero?
     beqz      t0, common_errDivideByZero
@@ -28,6 +27,4 @@
     $preinstr                              #  optional op
     $instr                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64($result0, $result1, rOBJ)   #  vAA/vAA+1 <- $result0/$result1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-15 instructions */
+    SET_VREG64_GOTO($result0, $result1, rOBJ, t0)   #  vA/vA+1 <- $result0/$result1
diff --git a/runtime/interpreter/mterp/mips/fbinop.S b/runtime/interpreter/mterp/mips/fbinop.S
index d0d39ae..6c1468c 100644
--- a/runtime/interpreter/mterp/mips/fbinop.S
+++ b/runtime/interpreter/mterp/mips/fbinop.S
@@ -6,7 +6,7 @@
 
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     srl       a3, a0, 8                    #  a3 <- CC
     and       a2, a0, 255                  #  a2 <- BB
     GET_VREG_F(fa1, a3)                    #  a1 <- vCC
@@ -14,6 +14,5 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     $instr                                 #  f0 = result
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- fv0
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vAA <- fv0
diff --git a/runtime/interpreter/mterp/mips/fbinop2addr.S b/runtime/interpreter/mterp/mips/fbinop2addr.S
index ccb67b1..2caaf9c 100644
--- a/runtime/interpreter/mterp/mips/fbinop2addr.S
+++ b/runtime/interpreter/mterp/mips/fbinop2addr.S
@@ -1,19 +1,18 @@
     /*
      * Generic 32-bit "/2addr" binary operation.  Provide an "instr"
-     * that specifies an instruction that performs "result = a0 op a1".
+     * that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-float/2addr, sub-float/2addr, mul-float/2addr,
-     * div-float/2addr, rem-float/2addr
+     *      div-float/2addr, rem-float/2addr
      */
     /* binop/2addr vA, vB */
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG_F(fa0, rOBJ)
     GET_VREG_F(fa1, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
     $instr
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- result
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- result
diff --git a/runtime/interpreter/mterp/mips/fbinopWide.S b/runtime/interpreter/mterp/mips/fbinopWide.S
index 3be9325..a1fe91e 100644
--- a/runtime/interpreter/mterp/mips/fbinopWide.S
+++ b/runtime/interpreter/mterp/mips/fbinopWide.S
@@ -1,6 +1,6 @@
     /*
-     * Generic 64-bit binary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point binary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * for: add-double, sub-double, mul-double, div-double,
@@ -9,7 +9,7 @@
      */
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a0, 255                  #  a2 <- BB
     srl       a3, a0, 8                    #  a3 <- CC
     EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
@@ -19,10 +19,5 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     $instr
-    SET_VREG64_F(fv0, fv0f, rOBJ)
-    b         .L${opcode}_finish
-%break
-
-.L${opcode}_finish:
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vAA/vAA+1 <- fv0
diff --git a/runtime/interpreter/mterp/mips/fbinopWide2addr.S b/runtime/interpreter/mterp/mips/fbinopWide2addr.S
index 8541f11..7303441 100644
--- a/runtime/interpreter/mterp/mips/fbinopWide2addr.S
+++ b/runtime/interpreter/mterp/mips/fbinopWide2addr.S
@@ -1,10 +1,11 @@
     /*
-     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point "/2addr" binary operation.
+     * Provide an "instr" line that specifies an instruction that
+     * performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
-     *  div-double/2addr, rem-double/2addr
+     *      div-double/2addr, rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -16,6 +17,5 @@
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     $instr
-    SET_VREG64_F(fv0, fv0f, rOBJ)
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vA/vA+1 <- fv0
diff --git a/runtime/interpreter/mterp/mips/footer.S b/runtime/interpreter/mterp/mips/footer.S
index 1363751..9909dfe 100644
--- a/runtime/interpreter/mterp/mips/footer.S
+++ b/runtime/interpreter/mterp/mips/footer.S
@@ -151,7 +151,7 @@
     REFRESH_IBASE()
     addu    a2, rINST, rINST            # a2<- byte offset
     FETCH_ADVANCE_INST_RB(a2)           # update rPC, load rINST
-    and     ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     bnez    ra, .L_suspend_request_pending
     GET_INST_OPCODE(t0)                 # extract opcode from rINST
     GOTO_OPCODE(t0)                     # jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/funop.S b/runtime/interpreter/mterp/mips/funop.S
index bfb9346..b2b22c9 100644
--- a/runtime/interpreter/mterp/mips/funop.S
+++ b/runtime/interpreter/mterp/mips/funop.S
@@ -1,18 +1,15 @@
     /*
-     * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
+     * Generic 32-bit floating-point unary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = op fa0".
      * This could be a MIPS instruction or a function call.
      *
-     * for: int-to-float, float-to-int
+     * for: int-to-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
-    GET_OPA4(rOBJ)                         #  t0 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG_F(fa0, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     $instr
-
-.L${opcode}_set_vreg_f:
-    SET_VREG_F(fv0, rOBJ)
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    GOTO_OPCODE(t1)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t1)         #  vA <- fv0
diff --git a/runtime/interpreter/mterp/mips/funopWide.S b/runtime/interpreter/mterp/mips/funopWide.S
deleted file mode 100644
index 3d4cf22..0000000
--- a/runtime/interpreter/mterp/mips/funopWide.S
+++ /dev/null
@@ -1,22 +0,0 @@
-%default {"preinstr":"", "ld_arg":"LOAD64_F(fa0, fa0f, a3)", "st_result":"SET_VREG64_F(fv0, fv0f, rOBJ)"}
-    /*
-     * Generic 64-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0/a1".
-     * This could be a MIPS instruction or a function call.
-     *
-     * long-to-double, double-to-long
-     */
-    /* unop vA, vB */
-    GET_OPA4(rOBJ)                         #  t1 <- A+
-    GET_OPB(a3)                            #  a3 <- B
-    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
-    $ld_arg
-    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    $preinstr                              #  optional op
-    $instr                                 #  a0/a1 <- op, a2-a3 changed
-
-.L${opcode}_set_vreg:
-    $st_result                             #  vAA <- a0/a1
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-13 instructions */
diff --git a/runtime/interpreter/mterp/mips/funopWider.S b/runtime/interpreter/mterp/mips/funopWider.S
index efb85f3..6862e24 100644
--- a/runtime/interpreter/mterp/mips/funopWider.S
+++ b/runtime/interpreter/mterp/mips/funopWider.S
@@ -1,10 +1,8 @@
-%default {"st_result":"SET_VREG64_F(fv0, fv0f, rOBJ)"}
     /*
-     * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0", where
-     * "result" is a 64-bit quantity in a0/a1.
+     * Generic 32bit-to-64bit floating-point unary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = op fa0".
      *
-     * For: int-to-double, float-to-long, float-to-double
+     * For: int-to-double, float-to-double
      */
     /* unop vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -12,8 +10,5 @@
     GET_VREG_F(fa0, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     $instr
-
-.L${opcode}_set_vreg:
-    $st_result                             #  vA/vA+1 <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0) #  vA/vA+1 <- fv0
diff --git a/runtime/interpreter/mterp/mips/header.S b/runtime/interpreter/mterp/mips/header.S
index a3a6744..0ce7745 100644
--- a/runtime/interpreter/mterp/mips/header.S
+++ b/runtime/interpreter/mterp/mips/header.S
@@ -153,6 +153,58 @@
 #define fcc1   $$fcc1
 #endif
 
+#ifdef MIPS32REVGE2
+#define SEB(rd, rt) \
+    seb       rd, rt
+#define SEH(rd, rt) \
+    seh       rd, rt
+#define INSERT_HIGH_HALF(rd_lo, rt_hi) \
+    ins       rd_lo, rt_hi, 16, 16
+#else
+#define SEB(rd, rt) \
+    sll       rd, rt, 24; \
+    sra       rd, rd, 24
+#define SEH(rd, rt) \
+    sll       rd, rt, 16; \
+    sra       rd, rd, 16
+/* Clobbers rt_hi on pre-R2. */
+#define INSERT_HIGH_HALF(rd_lo, rt_hi) \
+    sll       rt_hi, rt_hi, 16; \
+    or        rd_lo, rt_hi
+#endif
+
+#ifdef FPU64
+#define MOVE_TO_FPU_HIGH(r, flo, fhi) \
+    mthc1     r, flo
+#else
+#define MOVE_TO_FPU_HIGH(r, flo, fhi) \
+    mtc1      r, fhi
+#endif
+
+#ifdef MIPS32REVGE6
+#define JR(rt) \
+    jic       rt, 0
+#define LSA(rd, rs, rt, sa) \
+    .if sa; \
+    lsa       rd, rs, rt, sa; \
+    .else; \
+    addu      rd, rs, rt; \
+    .endif
+#else
+#define JR(rt) \
+    jalr      zero, rt
+#define LSA(rd, rs, rt, sa) \
+    .if sa; \
+    .set      push; \
+    .set      noat; \
+    sll       AT, rs, sa; \
+    addu      rd, AT, rt; \
+    .set      pop; \
+    .else; \
+    addu      rd, rs, rt; \
+    .endif
+#endif
+
 /*
  * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
  * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
@@ -186,12 +238,12 @@
     sw        rPC, OFF_FP_DEX_PC_PTR(rFP)
 
 #define EXPORT_DEX_PC(tmp) \
-    lw   tmp, OFF_FP_CODE_ITEM(rFP) \
-    sw   rPC, OFF_FP_DEX_PC_PTR(rFP) \
-    addu tmp, CODEITEM_INSNS_OFFSET \
-    subu tmp, rPC, tmp \
-    sra  tmp, tmp, 1 \
-    sw   tmp, OFF_FP_DEX_PC(rFP)
+    lw        tmp, OFF_FP_CODE_ITEM(rFP); \
+    sw        rPC, OFF_FP_DEX_PC_PTR(rFP); \
+    addu      tmp, CODEITEM_INSNS_OFFSET; \
+    subu      tmp, rPC, tmp; \
+    sra       tmp, tmp, 1; \
+    sw        tmp, OFF_FP_DEX_PC(rFP)
 
 /*
  * Fetch the next instruction from rPC into rINST.  Does not advance rPC.
@@ -206,18 +258,11 @@
  * exception catch may miss.  (This also implies that it must come after
  * EXPORT_PC().)
  */
-#define FETCH_ADVANCE_INST(_count) lhu rINST, ((_count)*2)(rPC); \
+#define FETCH_ADVANCE_INST(_count) \
+    lhu       rINST, ((_count)*2)(rPC); \
     addu      rPC, rPC, ((_count) * 2)
 
 /*
- * The operation performed here is similar to FETCH_ADVANCE_INST, except the
- * src and dest registers are parameterized (not hard-wired to rPC and rINST).
- */
-#define PREFETCH_ADVANCE_INST(_dreg, _sreg, _count) \
-    lhu       _dreg, ((_count)*2)(_sreg) ;            \
-    addu      _sreg, _sreg, (_count)*2
-
-/*
  * Similar to FETCH_ADVANCE_INST, but does not update rPC.  Used to load
  * rINST ahead of possible exception point.  Be sure to manually advance rPC
  * later.
@@ -232,7 +277,8 @@
  * rPC to point to the next instruction.  "rd" must specify the distance
  * in bytes, *not* 16-bit code units, and may be a signed value.
  */
-#define FETCH_ADVANCE_INST_RB(rd) addu rPC, rPC, rd; \
+#define FETCH_ADVANCE_INST_RB(rd) \
+    addu      rPC, rPC, rd; \
     lhu       rINST, (rPC)
 
 /*
@@ -257,38 +303,75 @@
 #define GET_INST_OPCODE(rd) and rd, rINST, 0xFF
 
 /*
- * Put the prefetched instruction's opcode field into the specified register.
+ * Transform opcode into branch target address.
  */
-#define GET_PREFETCHED_OPCODE(dreg, sreg)   andi     dreg, sreg, 255
+#define GET_OPCODE_TARGET(rd) \
+    sll       rd, rd, ${handler_size_bits}; \
+    addu      rd, rIBASE, rd
 
 /*
  * Begin executing the opcode in rd.
  */
-#define GOTO_OPCODE(rd) sll rd, rd, ${handler_size_bits}; \
-    addu      rd, rIBASE, rd; \
-    jalr      zero, rd
-
-#define GOTO_OPCODE_BASE(_base, rd)  sll rd, rd, ${handler_size_bits}; \
-    addu      rd, _base, rd; \
-    jalr      zero, rd
+#define GOTO_OPCODE(rd) \
+    GET_OPCODE_TARGET(rd); \
+    JR(rd)
 
 /*
  * Get/set the 32-bit value from a Dalvik register.
  */
 #define GET_VREG(rd, rix) LOAD_eas2(rd, rFP, rix)
 
-#define GET_VREG_F(rd, rix) EAS2(AT, rFP, rix); \
-    .set noat; l.s rd, (AT); .set at
+#define GET_VREG_F(rd, rix) \
+    .set noat; \
+    EAS2(AT, rFP, rix); \
+    l.s       rd, (AT); \
+    .set at
 
-#define SET_VREG(rd, rix) .set noat; \
+#ifdef MIPS32REVGE6
+#define SET_VREG(rd, rix) \
+    lsa       t8, rix, rFP, 2; \
+    sw        rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8)
+#else
+#define SET_VREG(rd, rix) \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
     sw        rd, 0(t8); \
     addu      t8, rREFS, AT; \
     .set at; \
     sw        zero, 0(t8)
+#endif
 
-#define SET_VREG64(rlo, rhi, rix) .set noat; \
+#ifdef MIPS32REVGE6
+#define SET_VREG_OBJECT(rd, rix) \
+    lsa       t8, rix, rFP, 2; \
+    sw        rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        rd, 0(t8)
+#else
+#define SET_VREG_OBJECT(rd, rix) \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    sw        rd, 0(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        rd, 0(t8)
+#endif
+
+#ifdef MIPS32REVGE6
+#define SET_VREG64(rlo, rhi, rix) \
+    lsa       t8, rix, rFP, 2; \
+    sw        rlo, 0(t8); \
+    sw        rhi, 4(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8); \
+    sw        zero, 4(t8)
+#else
+#define SET_VREG64(rlo, rhi, rix) \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
     sw        rlo, 0(t8); \
@@ -297,9 +380,39 @@
     .set at; \
     sw        zero, 0(t8); \
     sw        zero, 4(t8)
+#endif
 
-#ifdef FPU64
-#define SET_VREG64_F(rlo, rhi, rix) .set noat; \
+#ifdef MIPS32REVGE6
+#define SET_VREG_F(rd, rix) \
+    lsa       t8, rix, rFP, 2; \
+    s.s       rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8)
+#else
+#define SET_VREG_F(rd, rix) \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    s.s       rd, 0(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        zero, 0(t8)
+#endif
+
+#ifdef MIPS32REVGE6
+#define SET_VREG64_F(rlo, rhi, rix) \
+    lsa       t8, rix, rFP, 2; \
+    .set noat; \
+    mfhc1     AT, rlo; \
+    s.s       rlo, 0(t8); \
+    sw        AT, 4(t8); \
+    .set at; \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8); \
+    sw        zero, 4(t8)
+#elif defined(FPU64)
+#define SET_VREG64_F(rlo, rhi, rix) \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rREFS, AT; \
     sw        zero, 0(t8); \
@@ -310,7 +423,8 @@
     .set at; \
     s.s       rlo, 0(t8)
 #else
-#define SET_VREG64_F(rlo, rhi, rix) .set noat; \
+#define SET_VREG64_F(rlo, rhi, rix) \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
     s.s       rlo, 0(t8); \
@@ -321,18 +435,21 @@
     sw        zero, 4(t8)
 #endif
 
-#define SET_VREG_OBJECT(rd, rix) .set noat; \
-    sll       AT, rix, 2; \
-    addu      t8, rFP, AT; \
-    sw        rd, 0(t8); \
-    addu      t8, rREFS, AT; \
-    .set at; \
-    sw        rd, 0(t8)
-
 /* Combination of the SET_VREG and GOTO_OPCODE functions to save 1 instruction */
-#define SET_VREG_GOTO(rd, rix, dst) .set noreorder; \
-    sll       dst, dst, ${handler_size_bits}; \
-    addu      dst, rIBASE, dst; \
+#ifdef MIPS32REVGE6
+#define SET_VREG_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    sw        rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    jalr      zero, dst; \
+    sw        zero, 0(t8); \
+    .set reorder
+#else
+#define SET_VREG_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
     .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
@@ -342,11 +459,51 @@
     jalr      zero, dst; \
     sw        zero, 0(t8); \
     .set reorder
+#endif
+
+/* Combination of the SET_VREG_OBJECT and GOTO_OPCODE functions to save 1 instruction */
+#ifdef MIPS32REVGE6
+#define SET_VREG_OBJECT_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    sw        rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    jalr      zero, dst; \
+    sw        rd, 0(t8); \
+    .set reorder
+#else
+#define SET_VREG_OBJECT_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    sw        rd, 0(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    jalr      zero, dst; \
+    sw        rd, 0(t8); \
+    .set reorder
+#endif
 
 /* Combination of the SET_VREG64 and GOTO_OPCODE functions to save 1 instruction */
-#define SET_VREG64_GOTO(rlo, rhi, rix, dst) .set noreorder; \
-    sll       dst, dst, ${handler_size_bits}; \
-    addu      dst, rIBASE, dst; \
+#ifdef MIPS32REVGE6
+#define SET_VREG64_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    sw        rlo, 0(t8); \
+    sw        rhi, 4(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8); \
+    jalr      zero, dst; \
+    sw        zero, 4(t8); \
+    .set reorder
+#else
+#define SET_VREG64_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
     .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
@@ -358,14 +515,82 @@
     jalr      zero, dst; \
     sw        zero, 4(t8); \
     .set reorder
+#endif
 
-#define SET_VREG_F(rd, rix) .set noat; \
+/* Combination of the SET_VREG_F and GOTO_OPCODE functions to save 1 instruction */
+#ifdef MIPS32REVGE6
+#define SET_VREG_F_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    s.s       rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    jalr      zero, dst; \
+    sw        zero, 0(t8); \
+    .set reorder
+#else
+#define SET_VREG_F_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
     s.s       rd, 0(t8); \
     addu      t8, rREFS, AT; \
     .set at; \
-    sw        zero, 0(t8)
+    jalr      zero, dst; \
+    sw        zero, 0(t8); \
+    .set reorder
+#endif
+
+/* Combination of the SET_VREG64_F and GOTO_OPCODE functions to save 1 instruction */
+#ifdef MIPS32REVGE6
+#define SET_VREG64_F_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    .set noat; \
+    mfhc1     AT, rlo; \
+    s.s       rlo, 0(t8); \
+    sw        AT, 4(t8); \
+    .set at; \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8); \
+    jalr      zero, dst; \
+    sw        zero, 4(t8); \
+    .set reorder
+#elif defined(FPU64)
+#define SET_VREG64_F_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rREFS, AT; \
+    sw        zero, 0(t8); \
+    sw        zero, 4(t8); \
+    addu      t8, rFP, AT; \
+    mfhc1     AT, rlo; \
+    sw        AT, 4(t8); \
+    .set at; \
+    jalr      zero, dst; \
+    s.s       rlo, 0(t8); \
+    .set reorder
+#else
+#define SET_VREG64_F_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    s.s       rlo, 0(t8); \
+    s.s       rhi, 4(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        zero, 0(t8); \
+    jalr      zero, dst; \
+    sw        zero, 4(t8); \
+    .set reorder
+#endif
 
 #define GET_OPA(rd) srl rd, rINST, 8
 #ifdef MIPS32REVGE2
@@ -376,60 +601,60 @@
 #define GET_OPB(rd) srl rd, rINST, 12
 
 /*
- * Form an Effective Address rd = rbase + roff<<n;
- * Uses reg AT
+ * Form an Effective Address rd = rbase + roff<<shift;
+ * Uses reg AT on pre-R6.
  */
-#define EASN(rd, rbase, roff, rshift) .set noat; \
-    sll       AT, roff, rshift; \
-    addu      rd, rbase, AT; \
-    .set at
+#define EASN(rd, rbase, roff, shift) LSA(rd, roff, rbase, shift)
 
 #define EAS1(rd, rbase, roff) EASN(rd, rbase, roff, 1)
 #define EAS2(rd, rbase, roff) EASN(rd, rbase, roff, 2)
 #define EAS3(rd, rbase, roff) EASN(rd, rbase, roff, 3)
 #define EAS4(rd, rbase, roff) EASN(rd, rbase, roff, 4)
 
-/*
- * Form an Effective Shift Right rd = rbase + roff>>n;
- * Uses reg AT
- */
-#define ESRN(rd, rbase, roff, rshift) .set noat; \
-    srl       AT, roff, rshift; \
-    addu      rd, rbase, AT; \
+#define LOAD_eas2(rd, rbase, roff) \
+    .set noat; \
+    EAS2(AT, rbase, roff); \
+    lw        rd, 0(AT); \
     .set at
 
-#define LOAD_eas2(rd, rbase, roff) EAS2(AT, rbase, roff); \
-    .set noat; lw rd, 0(AT); .set at
-
-#define STORE_eas2(rd, rbase, roff) EAS2(AT, rbase, roff); \
-    .set noat; sw rd, 0(AT); .set at
+#define STORE_eas2(rd, rbase, roff) \
+    .set noat; \
+    EAS2(AT, rbase, roff); \
+    sw        rd, 0(AT); \
+    .set at
 
 #define LOAD_RB_OFF(rd, rbase, off) lw rd, off(rbase)
 #define STORE_RB_OFF(rd, rbase, off) sw rd, off(rbase)
 
-#define STORE64_off(rlo, rhi, rbase, off) sw rlo, off(rbase); \
+#define STORE64_off(rlo, rhi, rbase, off) \
+    sw        rlo, off(rbase); \
     sw        rhi, (off+4)(rbase)
-#define LOAD64_off(rlo, rhi, rbase, off) lw rlo, off(rbase); \
+#define LOAD64_off(rlo, rhi, rbase, off) \
+    lw        rlo, off(rbase); \
     lw        rhi, (off+4)(rbase)
 
 #define STORE64(rlo, rhi, rbase) STORE64_off(rlo, rhi, rbase, 0)
 #define LOAD64(rlo, rhi, rbase) LOAD64_off(rlo, rhi, rbase, 0)
 
 #ifdef FPU64
-#define STORE64_off_F(rlo, rhi, rbase, off) s.s rlo, off(rbase); \
+#define STORE64_off_F(rlo, rhi, rbase, off) \
+    s.s       rlo, off(rbase); \
     .set noat; \
     mfhc1     AT, rlo; \
     sw        AT, (off+4)(rbase); \
     .set at
-#define LOAD64_off_F(rlo, rhi, rbase, off) l.s rlo, off(rbase); \
+#define LOAD64_off_F(rlo, rhi, rbase, off) \
+    l.s       rlo, off(rbase); \
     .set noat; \
     lw        AT, (off+4)(rbase); \
     mthc1     AT, rlo; \
     .set at
 #else
-#define STORE64_off_F(rlo, rhi, rbase, off) s.s rlo, off(rbase); \
+#define STORE64_off_F(rlo, rhi, rbase, off) \
+    s.s       rlo, off(rbase); \
     s.s       rhi, (off+4)(rbase)
-#define LOAD64_off_F(rlo, rhi, rbase, off) l.s rlo, off(rbase); \
+#define LOAD64_off_F(rlo, rhi, rbase, off) \
+    l.s       rlo, off(rbase); \
     l.s       rhi, (off+4)(rbase)
 #endif
 
@@ -490,3 +715,11 @@
 
 #define REFRESH_IBASE() \
     lw        rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)
+
+/* Constants for float/double_to_int/long conversions */
+#define INT_MIN                 0x80000000
+#define INT_MIN_AS_FLOAT        0xCF000000
+#define INT_MIN_AS_DOUBLE_HIGH  0xC1E00000
+#define LONG_MIN_HIGH           0x80000000
+#define LONG_MIN_AS_FLOAT       0xDF000000
+#define LONG_MIN_AS_DOUBLE_HIGH 0xC3E00000
diff --git a/runtime/interpreter/mterp/mips/invoke.S b/runtime/interpreter/mterp/mips/invoke.S
index bcd3a57..db3b8af 100644
--- a/runtime/interpreter/mterp/mips/invoke.S
+++ b/runtime/interpreter/mterp/mips/invoke.S
@@ -2,8 +2,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern $helper
     EXPORT_PC()
     move    a0, rSELF
diff --git a/runtime/interpreter/mterp/mips/op_aget.S b/runtime/interpreter/mterp/mips/op_aget.S
index 8aa8992..e88402c 100644
--- a/runtime/interpreter/mterp/mips/op_aget.S
+++ b/runtime/interpreter/mterp/mips/op_aget.S
@@ -19,11 +19,7 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if $shift
     EASN(a0, a0, a1, $shift)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     # a1 >= a3; compare unsigned index
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/mips/op_aget_object.S b/runtime/interpreter/mterp/mips/op_aget_object.S
index e3ab9d8..9c49dfe 100644
--- a/runtime/interpreter/mterp/mips/op_aget_object.S
+++ b/runtime/interpreter/mterp/mips/op_aget_object.S
@@ -14,7 +14,6 @@
     lw   a1, THREAD_EXCEPTION_OFFSET(rSELF)
     PREFETCH_INST(2)                       #  load rINST
     bnez a1, MterpException
-    SET_VREG_OBJECT(v0, rOBJ)              #  vAA <- v0
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_OBJECT_GOTO(v0, rOBJ, t0)     #  vAA <- v0
diff --git a/runtime/interpreter/mterp/mips/op_aput.S b/runtime/interpreter/mterp/mips/op_aput.S
index 53d6ae0..46dcaee 100644
--- a/runtime/interpreter/mterp/mips/op_aput.S
+++ b/runtime/interpreter/mterp/mips/op_aput.S
@@ -17,14 +17,11 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if $shift
     EASN(a0, a0, a1, $shift)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_VREG(a2, rOBJ)                     #  a2 <- vAA
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
     $store a2, $data_offset(a0)            #  vBB[vCC] <- a2
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t0)                                 #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_aput_wide.S b/runtime/interpreter/mterp/mips/op_aput_wide.S
index ef99261..c3cff56 100644
--- a/runtime/interpreter/mterp/mips/op_aput_wide.S
+++ b/runtime/interpreter/mterp/mips/op_aput_wide.S
@@ -1,7 +1,5 @@
     /*
      * Array put, 64 bits.  vBB[vCC] <- vAA.
-     *
-     * Arrays of long/double are 64-bit aligned, so it's okay to use STRD.
      */
     /* aput-wide vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
@@ -21,5 +19,6 @@
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     LOAD64(a2, a3, rOBJ)                   #  a2/a3 <- vAA/vAA+1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
     STORE64_off(a2, a3, a0, MIRROR_WIDE_ARRAY_DATA_OFFSET) #  a2/a3 <- vBB[vCC]
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t0)                                 #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_array_length.S b/runtime/interpreter/mterp/mips/op_array_length.S
index 2b4a86f..ae2fe68 100644
--- a/runtime/interpreter/mterp/mips/op_array_length.S
+++ b/runtime/interpreter/mterp/mips/op_array_length.S
@@ -1,6 +1,7 @@
     /*
      * Return the length of an array.
      */
+    /* array-length vA, vB */
     GET_OPB(a1)                            #  a1 <- B
     GET_OPA4(a2)                           #  a2 <- A+
     GET_VREG(a0, a1)                       #  a0 <- vB (object ref)
diff --git a/runtime/interpreter/mterp/mips/op_check_cast.S b/runtime/interpreter/mterp/mips/op_check_cast.S
index 9a6cefa..3875ce6 100644
--- a/runtime/interpreter/mterp/mips/op_check_cast.S
+++ b/runtime/interpreter/mterp/mips/op_check_cast.S
@@ -1,7 +1,7 @@
     /*
      * Check to see if a cast from one class to another is allowed.
      */
-    # check-cast vAA, class                /* BBBB */
+    /* check-cast vAA, class@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                           #  a0 <- BBBB
     GET_OPA(a1)                            #  a1 <- AA
diff --git a/runtime/interpreter/mterp/mips/op_cmpg_double.S b/runtime/interpreter/mterp/mips/op_cmpg_double.S
index e7965a7..b2e7532 100644
--- a/runtime/interpreter/mterp/mips/op_cmpg_double.S
+++ b/runtime/interpreter/mterp/mips/op_cmpg_double.S
@@ -1 +1 @@
-%include "mips/op_cmpl_double.S" { "naninst":"li rTEMP, 1" }
+%include "mips/op_cmpl_double.S" { "gt_bias":"1" }
diff --git a/runtime/interpreter/mterp/mips/op_cmpg_float.S b/runtime/interpreter/mterp/mips/op_cmpg_float.S
index 53519a6..76550b5 100644
--- a/runtime/interpreter/mterp/mips/op_cmpg_float.S
+++ b/runtime/interpreter/mterp/mips/op_cmpg_float.S
@@ -1 +1 @@
-%include "mips/op_cmpl_float.S" { "naninst":"li rTEMP, 1" }
+%include "mips/op_cmpl_float.S" { "gt_bias":"1" }
diff --git a/runtime/interpreter/mterp/mips/op_cmpl_double.S b/runtime/interpreter/mterp/mips/op_cmpl_double.S
index 5a47fd7..369e5b3 100644
--- a/runtime/interpreter/mterp/mips/op_cmpl_double.S
+++ b/runtime/interpreter/mterp/mips/op_cmpl_double.S
@@ -1,53 +1,51 @@
-%default { "naninst":"li rTEMP, -1" }
+%default { "gt_bias":"0" }
     /*
      * Compare two floating-point values. Puts 0(==), 1(>), or -1(<)
-     * into the destination register (rTEMP) based on the comparison results.
-     *
-     * Provide a "naninst" instruction that puts 1 or -1 into rTEMP depending
-     * on what value we'd like to return when one of the operands is NaN.
-     *
-     * See op_cmpl_float for more details.
+     * into the destination register based on the comparison results.
      *
      * For: cmpl-double, cmpg-double
      */
     /* op vAA, vBB, vCC */
 
     FETCH(a0, 1)                           #  a0 <- CCBB
-    and       rOBJ, a0, 255                #  s5 <- BB
+    and       rOBJ, a0, 255                #  rOBJ <- BB
     srl       t0, a0, 8                    #  t0 <- CC
-    EAS2(rOBJ, rFP, rOBJ)                  #  s5 <- &fp[BB]
+    EAS2(rOBJ, rFP, rOBJ)                  #  rOBJ <- &fp[BB]
     EAS2(t0, rFP, t0)                      #  t0 <- &fp[CC]
     LOAD64_F(ft0, ft0f, rOBJ)
     LOAD64_F(ft1, ft1f, t0)
 #ifdef MIPS32REVGE6
-    cmp.ult.d ft2, ft0, ft1
-    li        rTEMP, -1
-    bc1nez    ft2, .L${opcode}_finish
-    cmp.ult.d ft2, ft1, ft0
-    li        rTEMP, 1
-    bc1nez    ft2, .L${opcode}_finish
     cmp.eq.d  ft2, ft0, ft1
     li        rTEMP, 0
-    bc1nez    ft2, .L${opcode}_finish
-    b         .L${opcode}_nan
-#else
-    c.olt.d   fcc0, ft0, ft1
+    bc1nez    ft2, 1f                      # done if vBB == vCC (ordered)
+    .if $gt_bias
+    cmp.lt.d  ft2, ft0, ft1
     li        rTEMP, -1
-    bc1t      fcc0, .L${opcode}_finish
-    c.olt.d   fcc0, ft1, ft0
+    bc1nez    ft2, 1f                      # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    cmp.lt.d  ft2, ft1, ft0
     li        rTEMP, 1
-    bc1t      fcc0, .L${opcode}_finish
+    bc1nez    ft2, 1f                      # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
+#else
     c.eq.d    fcc0, ft0, ft1
     li        rTEMP, 0
-    bc1t      fcc0, .L${opcode}_finish
-    b         .L${opcode}_nan
+    bc1t      fcc0, 1f                     # done if vBB == vCC (ordered)
+    .if $gt_bias
+    c.olt.d   fcc0, ft0, ft1
+    li        rTEMP, -1
+    bc1t      fcc0, 1f                     # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    c.olt.d   fcc0, ft1, ft0
+    li        rTEMP, 1
+    bc1t      fcc0, 1f                     # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
 #endif
-%break
-
-.L${opcode}_nan:
-    $naninst
-
-.L${opcode}_finish:
+1:
     GET_OPA(rOBJ)
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
diff --git a/runtime/interpreter/mterp/mips/op_cmpl_float.S b/runtime/interpreter/mterp/mips/op_cmpl_float.S
index cfd87ee..1dd5506 100644
--- a/runtime/interpreter/mterp/mips/op_cmpl_float.S
+++ b/runtime/interpreter/mterp/mips/op_cmpl_float.S
@@ -1,60 +1,49 @@
-%default { "naninst":"li rTEMP, -1" }
+%default { "gt_bias":"0" }
     /*
-     * Compare two floating-point values.  Puts 0, 1, or -1 into the
-     * destination register rTEMP based on the results of the comparison.
-     *
-     * Provide a "naninst" instruction that puts 1 or -1 into rTEMP depending
-     * on what value we'd like to return when one of the operands is NaN.
-     *
-     * The operation we're implementing is:
-     *   if (x == y)
-     *     return 0;
-     *   else if (x < y)
-     *     return -1;
-     *   else if (x > y)
-     *     return 1;
-     *   else
-     *     return {-1 or 1};  // one or both operands was NaN
+     * Compare two floating-point values. Puts 0(==), 1(>), or -1(<)
+     * into the destination register based on the comparison results.
      *
      * for: cmpl-float, cmpg-float
      */
     /* op vAA, vBB, vCC */
 
-    /* "clasic" form */
     FETCH(a0, 1)                           #  a0 <- CCBB
     and       a2, a0, 255                  #  a2 <- BB
     srl       a3, a0, 8
     GET_VREG_F(ft0, a2)
     GET_VREG_F(ft1, a3)
 #ifdef MIPS32REVGE6
-    cmp.ult.s ft2, ft0, ft1               # Is ft0 < ft1
-    li        rTEMP, -1
-    bc1nez    ft2, .L${opcode}_finish
-    cmp.ult.s ft2, ft1, ft0
-    li        rTEMP, 1
-    bc1nez    ft2, .L${opcode}_finish
     cmp.eq.s  ft2, ft0, ft1
     li        rTEMP, 0
-    bc1nez    ft2, .L${opcode}_finish
-    b         .L${opcode}_nan
-#else
-    c.olt.s   fcc0, ft0, ft1               # Is ft0 < ft1
+    bc1nez    ft2, 1f                      # done if vBB == vCC (ordered)
+    .if $gt_bias
+    cmp.lt.s  ft2, ft0, ft1
     li        rTEMP, -1
-    bc1t      fcc0, .L${opcode}_finish
-    c.olt.s   fcc0, ft1, ft0
+    bc1nez    ft2, 1f                      # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    cmp.lt.s  ft2, ft1, ft0
     li        rTEMP, 1
-    bc1t      fcc0, .L${opcode}_finish
+    bc1nez    ft2, 1f                      # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
+#else
     c.eq.s    fcc0, ft0, ft1
     li        rTEMP, 0
-    bc1t      fcc0, .L${opcode}_finish
-    b         .L${opcode}_nan
+    bc1t      fcc0, 1f                     # done if vBB == vCC (ordered)
+    .if $gt_bias
+    c.olt.s   fcc0, ft0, ft1
+    li        rTEMP, -1
+    bc1t      fcc0, 1f                     # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    c.olt.s   fcc0, ft1, ft0
+    li        rTEMP, 1
+    bc1t      fcc0, 1f                     # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
 #endif
-%break
-
-.L${opcode}_nan:
-    $naninst
-
-.L${opcode}_finish:
+1:
     GET_OPA(rOBJ)
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
diff --git a/runtime/interpreter/mterp/mips/op_const.S b/runtime/interpreter/mterp/mips/op_const.S
index c505761..bd9f873 100644
--- a/runtime/interpreter/mterp/mips/op_const.S
+++ b/runtime/interpreter/mterp/mips/op_const.S
@@ -1,9 +1,8 @@
-    # const vAA,                           /* +BBBBbbbb */
+    /* const vAA, +BBBBbbbb */
     GET_OPA(a3)                            #  a3 <- AA
     FETCH(a0, 1)                           #  a0 <- bbbb (low)
     FETCH(a1, 2)                           #  a1 <- BBBB (high)
     FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
-    sll       a1, a1, 16
-    or        a0, a1, a0                   #  a0 <- BBBBbbbb
+    INSERT_HIGH_HALF(a0, a1)               #  a0 <- BBBBbbbb
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, a3, t0)              #  vAA <- a0
diff --git a/runtime/interpreter/mterp/mips/op_const_16.S b/runtime/interpreter/mterp/mips/op_const_16.S
index 5e47633..2ffb30f 100644
--- a/runtime/interpreter/mterp/mips/op_const_16.S
+++ b/runtime/interpreter/mterp/mips/op_const_16.S
@@ -1,4 +1,4 @@
-    # const/16 vAA,                        /* +BBBB */
+    /* const/16 vAA, +BBBB */
     FETCH_S(a0, 1)                         #  a0 <- ssssBBBB (sign-extended)
     GET_OPA(a3)                            #  a3 <- AA
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/mips/op_const_4.S b/runtime/interpreter/mterp/mips/op_const_4.S
index 8b662f9..6866c78 100644
--- a/runtime/interpreter/mterp/mips/op_const_4.S
+++ b/runtime/interpreter/mterp/mips/op_const_4.S
@@ -1,4 +1,4 @@
-    # const/4 vA,                          /* +B */
+    /* const/4 vA, +B */
     sll       a1, rINST, 16                #  a1 <- Bxxx0000
     GET_OPA(a0)                            #  a0 <- A+
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
diff --git a/runtime/interpreter/mterp/mips/op_const_class.S b/runtime/interpreter/mterp/mips/op_const_class.S
index 7202b11..9adea44 100644
--- a/runtime/interpreter/mterp/mips/op_const_class.S
+++ b/runtime/interpreter/mterp/mips/op_const_class.S
@@ -1,4 +1,4 @@
-    # const/class vAA, Class               /* BBBB */
+    /* const/class vAA, class@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                        # a0 <- BBBB
     GET_OPA(a1)                         # a1 <- AA
diff --git a/runtime/interpreter/mterp/mips/op_const_high16.S b/runtime/interpreter/mterp/mips/op_const_high16.S
index 36c1c35..5162402 100644
--- a/runtime/interpreter/mterp/mips/op_const_high16.S
+++ b/runtime/interpreter/mterp/mips/op_const_high16.S
@@ -1,4 +1,4 @@
-    # const/high16 vAA,                    /* +BBBB0000 */
+    /* const/high16 vAA, +BBBB0000 */
     FETCH(a0, 1)                           #  a0 <- 0000BBBB (zero-extended)
     GET_OPA(a3)                            #  a3 <- AA
     sll       a0, a0, 16                   #  a0 <- BBBB0000
diff --git a/runtime/interpreter/mterp/mips/op_const_string.S b/runtime/interpreter/mterp/mips/op_const_string.S
index d8eeb46..006e114 100644
--- a/runtime/interpreter/mterp/mips/op_const_string.S
+++ b/runtime/interpreter/mterp/mips/op_const_string.S
@@ -1,4 +1,4 @@
-    # const/string vAA, String             /* BBBB */
+    /* const/string vAA, string@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                        # a0 <- BBBB
     GET_OPA(a1)                         # a1 <- AA
diff --git a/runtime/interpreter/mterp/mips/op_const_string_jumbo.S b/runtime/interpreter/mterp/mips/op_const_string_jumbo.S
index d732ca1..54cec97 100644
--- a/runtime/interpreter/mterp/mips/op_const_string_jumbo.S
+++ b/runtime/interpreter/mterp/mips/op_const_string_jumbo.S
@@ -1,10 +1,9 @@
-    # const/string vAA, String          /* BBBBBBBB */
+    /* const/string vAA, string@BBBBBBBB */
     EXPORT_PC()
     FETCH(a0, 1)                        # a0 <- bbbb (low)
     FETCH(a2, 2)                        # a2 <- BBBB (high)
     GET_OPA(a1)                         # a1 <- AA
-    sll    a2, a2, 16
-    or     a0, a0, a2                   # a0 <- BBBBbbbb
+    INSERT_HIGH_HALF(a0, a2)            # a0 <- BBBBbbbb
     addu   a2, rFP, OFF_FP_SHADOWFRAME  # a2 <- shadow frame
     move   a3, rSELF
     JAL(MterpConstString)               # v0 <- Mterp(index, tgt_reg, shadow_frame, self)
diff --git a/runtime/interpreter/mterp/mips/op_const_wide.S b/runtime/interpreter/mterp/mips/op_const_wide.S
index 01d0f87..f8911e3 100644
--- a/runtime/interpreter/mterp/mips/op_const_wide.S
+++ b/runtime/interpreter/mterp/mips/op_const_wide.S
@@ -1,14 +1,11 @@
-    # const-wide vAA,                      /* +HHHHhhhhBBBBbbbb */
+    /* const-wide vAA, +HHHHhhhhBBBBbbbb */
     FETCH(a0, 1)                           #  a0 <- bbbb (low)
     FETCH(a1, 2)                           #  a1 <- BBBB (low middle)
     FETCH(a2, 3)                           #  a2 <- hhhh (high middle)
-    sll       a1, 16 #
-    or        a0, a1                       #  a0 <- BBBBbbbb (low word)
+    INSERT_HIGH_HALF(a0, a1)               #  a0 <- BBBBbbbb (low word)
     FETCH(a3, 4)                           #  a3 <- HHHH (high)
     GET_OPA(t1)                            #  t1 <- AA
-    sll       a3, 16
-    or        a1, a3, a2                   #  a1 <- HHHHhhhh (high word)
+    INSERT_HIGH_HALF(a2, a3)               #  a2 <- HHHHhhhh (high word)
     FETCH_ADVANCE_INST(5)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, t1)                 #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a2, t1, t0)        #  vAA/vAA+1 <- a0/a2
diff --git a/runtime/interpreter/mterp/mips/op_const_wide_16.S b/runtime/interpreter/mterp/mips/op_const_wide_16.S
index 583d9ef..2ca5ab9 100644
--- a/runtime/interpreter/mterp/mips/op_const_wide_16.S
+++ b/runtime/interpreter/mterp/mips/op_const_wide_16.S
@@ -1,8 +1,7 @@
-    # const-wide/16 vAA,                   /* +BBBB */
+    /* const-wide/16 vAA, +BBBB */
     FETCH_S(a0, 1)                         #  a0 <- ssssBBBB (sign-extended)
     GET_OPA(a3)                            #  a3 <- AA
     sra       a1, a0, 31                   #  a1 <- ssssssss
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, a3)                 #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a3, t0)        #  vAA/vAA+1 <- a0/a1
diff --git a/runtime/interpreter/mterp/mips/op_const_wide_32.S b/runtime/interpreter/mterp/mips/op_const_wide_32.S
index 3eb4574..bf802ca 100644
--- a/runtime/interpreter/mterp/mips/op_const_wide_32.S
+++ b/runtime/interpreter/mterp/mips/op_const_wide_32.S
@@ -1,11 +1,9 @@
-    # const-wide/32 vAA,                   /* +BBBBbbbb */
+    /* const-wide/32 vAA, +BBBBbbbb */
     FETCH(a0, 1)                           #  a0 <- 0000bbbb (low)
     GET_OPA(a3)                            #  a3 <- AA
     FETCH_S(a2, 2)                         #  a2 <- ssssBBBB (high)
     FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
-    sll       a2, a2, 16
-    or        a0, a0, a2                   #  a0 <- BBBBbbbb
+    INSERT_HIGH_HALF(a0, a2)               #  a0 <- BBBBbbbb
     sra       a1, a0, 31                   #  a1 <- ssssssss
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, a3)                 #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a3, t0)        #  vAA/vAA+1 <- a0/a1
diff --git a/runtime/interpreter/mterp/mips/op_const_wide_high16.S b/runtime/interpreter/mterp/mips/op_const_wide_high16.S
index 88382c6..04b90fa 100644
--- a/runtime/interpreter/mterp/mips/op_const_wide_high16.S
+++ b/runtime/interpreter/mterp/mips/op_const_wide_high16.S
@@ -1,9 +1,8 @@
-    # const-wide/high16 vAA,               /* +BBBB000000000000 */
+    /* const-wide/high16 vAA, +BBBB000000000000 */
     FETCH(a1, 1)                           #  a1 <- 0000BBBB (zero-extended)
     GET_OPA(a3)                            #  a3 <- AA
     li        a0, 0                        #  a0 <- 00000000
     sll       a1, 16                       #  a1 <- BBBB0000
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, a3)                 #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a3, t0)        #  vAA/vAA+1 <- a0/a1
diff --git a/runtime/interpreter/mterp/mips/op_double_to_int.S b/runtime/interpreter/mterp/mips/op_double_to_int.S
index 30a0a73..3b44964 100644
--- a/runtime/interpreter/mterp/mips/op_double_to_int.S
+++ b/runtime/interpreter/mterp/mips/op_double_to_int.S
@@ -1,58 +1,39 @@
-%include "mips/unopNarrower.S" {"instr":"b d2i_doconv"}
-/*
- * Convert the double in a0/a1 to an int in a0.
- *
- * We have to clip values to int min/max per the specification.  The
- * expected common case is a "reasonable" value that converts directly
- * to modest integer.  The EABI convert function isn't doing this for us.
- */
-%break
+    /*
+     * double-to-int
+     *
+     * We have to clip values to int min/max per the specification.  The
+     * expected common case is a "reasonable" value that converts directly
+     * to modest integer.  The EABI convert function isn't doing this for us.
+     */
+    /* unop vA, vB */
+    GET_OPB(a3)                            #  a3 <- B
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+    LOAD64_F(fa0, fa0f, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
-d2i_doconv:
+    li        t0, INT_MIN_AS_DOUBLE_HIGH
+    mtc1      zero, fa1
+    MOVE_TO_FPU_HIGH(t0, fa1, fa1f)
 #ifdef MIPS32REVGE6
-    la        t0, .LDOUBLE_TO_INT_max
-    LOAD64_F(fa1, fa1f, t0)
-    cmp.ule.d ft2, fa1, fa0
-    l.s       fv0, .LDOUBLE_TO_INT_maxret
-    bc1nez    ft2, .L${opcode}_set_vreg_f
-
-    la        t0, .LDOUBLE_TO_INT_min
-    LOAD64_F(fa1, fa1f, t0)
-    cmp.ule.d ft2, fa0, fa1
-    l.s       fv0, .LDOUBLE_TO_INT_minret
-    bc1nez    ft2, .L${opcode}_set_vreg_f
-
-    mov.d     fa1, fa0
-    cmp.un.d  ft2, fa0, fa1
-    li.s      fv0, 0
-    bc1nez    ft2, .L${opcode}_set_vreg_f
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    cmp.le.d  ft0, fa1, fa0
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1nez    ft0, 1f                      #  if INT_MIN <= vB, proceed to truncation
+    cmp.eq.d  ft0, fa0, fa0
+    selnez.d  fa0, fa1, ft0                #  fa0 = ordered(vB) ? INT_MIN_AS_DOUBLE : 0
 #else
-    la        t0, .LDOUBLE_TO_INT_max
-    LOAD64_F(fa1, fa1f, t0)
     c.ole.d   fcc0, fa1, fa0
-    l.s       fv0, .LDOUBLE_TO_INT_maxret
-    bc1t      .L${opcode}_set_vreg_f
-
-    la        t0, .LDOUBLE_TO_INT_min
-    LOAD64_F(fa1, fa1f, t0)
-    c.ole.d   fcc0, fa0, fa1
-    l.s       fv0, .LDOUBLE_TO_INT_minret
-    bc1t      .L${opcode}_set_vreg_f
-
-    mov.d     fa1, fa0
-    c.un.d    fcc0, fa0, fa1
-    li.s      fv0, 0
-    bc1t      .L${opcode}_set_vreg_f
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1t      fcc0, 1f                     #  if INT_MIN <= vB, proceed to truncation
+    c.eq.d    fcc0, fa0, fa0
+    mtc1      zero, fa0
+    MOVE_TO_FPU_HIGH(zero, fa0, fa0f)
+    movt.d    fa0, fa1, fcc0               #  fa0 = ordered(vB) ? INT_MIN_AS_DOUBLE : 0
 #endif
-
-    trunc.w.d  fv0, fa0
-    b         .L${opcode}_set_vreg_f
-
-.LDOUBLE_TO_INT_max:
-    .dword 0x41dfffffffc00000
-.LDOUBLE_TO_INT_min:
-    .dword 0xc1e0000000000000              #  minint, as a double (high word)
-.LDOUBLE_TO_INT_maxret:
-    .word 0x7fffffff
-.LDOUBLE_TO_INT_minret:
-    .word 0x80000000
+1:
+    trunc.w.d fa0, fa0
+    SET_VREG_F_GOTO(fa0, rOBJ, t1)         #  vA <- result
diff --git a/runtime/interpreter/mterp/mips/op_double_to_long.S b/runtime/interpreter/mterp/mips/op_double_to_long.S
index 4f9e367..78d4a8f 100644
--- a/runtime/interpreter/mterp/mips/op_double_to_long.S
+++ b/runtime/interpreter/mterp/mips/op_double_to_long.S
@@ -1,56 +1,61 @@
-%include "mips/funopWide.S" {"instr":"b d2l_doconv", "st_result":"SET_VREG64(rRESULT0, rRESULT1, rOBJ)"}
+    /*
+     * double-to-long
+     *
+     * We have to clip values to long min/max per the specification.  The
+     * expected common case is a "reasonable" value that converts directly
+     * to modest integer.  The EABI convert function isn't doing this for us.
+     */
+    /* unop vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+    LOAD64_F(fa0, fa0f, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+
+#ifdef MIPS32REVGE6
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    li        t0, LONG_MIN_AS_DOUBLE_HIGH
+    mtc1      zero, fa1
+    mthc1     t0, fa1
+    cmp.le.d  ft0, fa1, fa0
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1nez    ft0, 1f                      #  if LONG_MIN <= vB, proceed to truncation
+    cmp.eq.d  ft0, fa0, fa0
+    selnez.d  fa0, fa1, ft0                #  fa0 = ordered(vB) ? LONG_MIN_AS_DOUBLE : 0
+1:
+    trunc.l.d fa0, fa0
+    SET_VREG64_F_GOTO(fa0, fa0f, rOBJ, t1) #  vA <- result
+#else
+    c.eq.d    fcc0, fa0, fa0
+    li        rRESULT0, 0
+    li        rRESULT1, 0
+    bc1f      fcc0, .L${opcode}_get_opcode
+
+    li        t0, LONG_MIN_AS_DOUBLE_HIGH
+    mtc1      zero, fa1
+    MOVE_TO_FPU_HIGH(t0, fa1, fa1f)
+    c.ole.d   fcc0, fa0, fa1
+    li        rRESULT1, LONG_MIN_HIGH
+    bc1t      fcc0, .L${opcode}_get_opcode
+
+    neg.d     fa1, fa1
+    c.ole.d   fcc0, fa1, fa0
+    nor       rRESULT0, rRESULT0, zero
+    nor       rRESULT1, rRESULT1, zero
+    bc1t      fcc0, .L${opcode}_get_opcode
+
+    JAL(__fixdfdi)
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    b         .L${opcode}_set_vreg
+#endif
 %break
 
-d2l_doconv:
-#ifdef MIPS32REVGE6
-    la        t0, .LDOUBLE_TO_LONG_max
-    LOAD64_F(fa1, fa1f, t0)
-    cmp.ule.d ft2, fa1, fa0
-    la        t0, .LDOUBLE_TO_LONG_ret_max
-    LOAD64(rRESULT0, rRESULT1, t0)
-    bc1nez    ft2, .L${opcode}_set_vreg
-
-    la        t0, .LDOUBLE_TO_LONG_min
-    LOAD64_F(fa1, fa1f, t0)
-    cmp.ule.d ft2, fa0, fa1
-    la        t0, .LDOUBLE_TO_LONG_ret_min
-    LOAD64(rRESULT0, rRESULT1, t0)
-    bc1nez    ft2, .L${opcode}_set_vreg
-
-    mov.d     fa1, fa0
-    cmp.un.d  ft2, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0
-    bc1nez    ft2, .L${opcode}_set_vreg
-#else
-    la        t0, .LDOUBLE_TO_LONG_max
-    LOAD64_F(fa1, fa1f, t0)
-    c.ole.d   fcc0, fa1, fa0
-    la        t0, .LDOUBLE_TO_LONG_ret_max
-    LOAD64(rRESULT0, rRESULT1, t0)
-    bc1t      .L${opcode}_set_vreg
-
-    la        t0, .LDOUBLE_TO_LONG_min
-    LOAD64_F(fa1, fa1f, t0)
-    c.ole.d   fcc0, fa0, fa1
-    la        t0, .LDOUBLE_TO_LONG_ret_min
-    LOAD64(rRESULT0, rRESULT1, t0)
-    bc1t      .L${opcode}_set_vreg
-
-    mov.d     fa1, fa0
-    c.un.d    fcc0, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0
-    bc1t      .L${opcode}_set_vreg
+#ifndef MIPS32REVGE6
+.L${opcode}_get_opcode:
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+.L${opcode}_set_vreg:
+    SET_VREG64_GOTO(rRESULT0, rRESULT1, rOBJ, t1)   #  vA/vA+1 <- v0/v1
 #endif
-    JAL(__fixdfdi)
-    b         .L${opcode}_set_vreg
-
-.LDOUBLE_TO_LONG_max:
-    .dword 0x43e0000000000000              #  maxlong, as a double (high word)
-.LDOUBLE_TO_LONG_min:
-    .dword 0xc3e0000000000000              #  minlong, as a double (high word)
-.LDOUBLE_TO_LONG_ret_max:
-    .dword 0x7fffffffffffffff
-.LDOUBLE_TO_LONG_ret_min:
-    .dword 0x8000000000000000
diff --git a/runtime/interpreter/mterp/mips/op_fill_array_data.S b/runtime/interpreter/mterp/mips/op_fill_array_data.S
index 8605746..c3cd371 100644
--- a/runtime/interpreter/mterp/mips/op_fill_array_data.S
+++ b/runtime/interpreter/mterp/mips/op_fill_array_data.S
@@ -1,10 +1,9 @@
     /* fill-array-data vAA, +BBBBBBBB */
     EXPORT_PC()
-    FETCH(a0, 1)                           #  a0 <- bbbb (lo)
-    FETCH(a1, 2)                           #  a1 <- BBBB (hi)
+    FETCH(a1, 1)                           #  a1 <- bbbb (lo)
+    FETCH(a0, 2)                           #  a0 <- BBBB (hi)
     GET_OPA(a3)                            #  a3 <- AA
-    sll       a1, a1, 16                   #  a1 <- BBBBbbbb
-    or        a1, a0, a1                   #  a1 <- BBBBbbbb
+    INSERT_HIGH_HALF(a1, a0)               #  a1 <- BBBBbbbb
     GET_VREG(a0, a3)                       #  a0 <- vAA (array object)
     EAS1(a1, rPC, a1)                      #  a1 <- PC + BBBBbbbb*2 (array data off.)
     JAL(MterpFillArrayData)                #  v0 <- Mterp(obj, payload)
diff --git a/runtime/interpreter/mterp/mips/op_filled_new_array.S b/runtime/interpreter/mterp/mips/op_filled_new_array.S
index 3f62fae..9511578 100644
--- a/runtime/interpreter/mterp/mips/op_filled_new_array.S
+++ b/runtime/interpreter/mterp/mips/op_filled_new_array.S
@@ -4,8 +4,8 @@
      *
      * for: filled-new-array, filled-new-array/range
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, type       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
     .extern $helper
     EXPORT_PC()
     addu   a0, rFP, OFF_FP_SHADOWFRAME     # a0 <- shadow frame
diff --git a/runtime/interpreter/mterp/mips/op_float_to_int.S b/runtime/interpreter/mterp/mips/op_float_to_int.S
index e032869..087e50f 100644
--- a/runtime/interpreter/mterp/mips/op_float_to_int.S
+++ b/runtime/interpreter/mterp/mips/op_float_to_int.S
@@ -1,50 +1,36 @@
-%include "mips/funop.S" {"instr":"b f2i_doconv"}
-%break
+    /*
+     * float-to-int
+     *
+     * We have to clip values to int min/max per the specification.  The
+     * expected common case is a "reasonable" value that converts directly
+     * to modest integer.  The EABI convert function isn't doing this for us.
+     */
+    /* unop vA, vB */
+    GET_OPB(a3)                            #  a3 <- B
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_VREG_F(fa0, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
-/*
- * Not an entry point as it is used only once !!
- */
-f2i_doconv:
+    li        t0, INT_MIN_AS_FLOAT
+    mtc1      t0, fa1
 #ifdef MIPS32REVGE6
-    l.s       fa1, .LFLOAT_TO_INT_max
-    cmp.ule.s ft2, fa1, fa0
-    l.s       fv0, .LFLOAT_TO_INT_ret_max
-    bc1nez    ft2, .L${opcode}_set_vreg_f
-
-    l.s       fa1, .LFLOAT_TO_INT_min
-    cmp.ule.s ft2, fa0, fa1
-    l.s       fv0, .LFLOAT_TO_INT_ret_min
-    bc1nez    ft2, .L${opcode}_set_vreg_f
-
-    mov.s     fa1, fa0
-    cmp.un.s  ft2, fa0, fa1
-    li.s      fv0, 0
-    bc1nez    ft2, .L${opcode}_set_vreg_f
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    cmp.le.s  ft0, fa1, fa0
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1nez    ft0, 1f                      #  if INT_MIN <= vB, proceed to truncation
+    cmp.eq.s  ft0, fa0, fa0
+    selnez.s  fa0, fa1, ft0                #  fa0 = ordered(vB) ? INT_MIN_AS_FLOAT : 0
 #else
-    l.s       fa1, .LFLOAT_TO_INT_max
     c.ole.s   fcc0, fa1, fa0
-    l.s       fv0, .LFLOAT_TO_INT_ret_max
-    bc1t      .L${opcode}_set_vreg_f
-
-    l.s       fa1, .LFLOAT_TO_INT_min
-    c.ole.s   fcc0, fa0, fa1
-    l.s       fv0, .LFLOAT_TO_INT_ret_min
-    bc1t      .L${opcode}_set_vreg_f
-
-    mov.s     fa1, fa0
-    c.un.s    fcc0, fa0, fa1
-    li.s      fv0, 0
-    bc1t      .L${opcode}_set_vreg_f
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1t      fcc0, 1f                     #  if INT_MIN <= vB, proceed to truncation
+    c.eq.s    fcc0, fa0, fa0
+    mtc1      zero, fa0
+    movt.s    fa0, fa1, fcc0               #  fa0 = ordered(vB) ? INT_MIN_AS_FLOAT : 0
 #endif
-
-    trunc.w.s  fv0, fa0
-    b         .L${opcode}_set_vreg_f
-
-.LFLOAT_TO_INT_max:
-    .word 0x4f000000
-.LFLOAT_TO_INT_min:
-    .word 0xcf000000
-.LFLOAT_TO_INT_ret_max:
-    .word 0x7fffffff
-.LFLOAT_TO_INT_ret_min:
-    .word 0x80000000
+1:
+    trunc.w.s fa0, fa0
+    SET_VREG_F_GOTO(fa0, rOBJ, t1)         #  vA <- result
diff --git a/runtime/interpreter/mterp/mips/op_float_to_long.S b/runtime/interpreter/mterp/mips/op_float_to_long.S
index 77b2c46..dc88a78 100644
--- a/runtime/interpreter/mterp/mips/op_float_to_long.S
+++ b/runtime/interpreter/mterp/mips/op_float_to_long.S
@@ -1,51 +1,58 @@
-%include "mips/funopWider.S" {"instr":"b f2l_doconv", "st_result":"SET_VREG64(rRESULT0, rRESULT1, rOBJ)"}
-%break
+    /*
+     * float-to-long
+     *
+     * We have to clip values to long min/max per the specification.  The
+     * expected common case is a "reasonable" value that converts directly
+     * to modest integer.  The EABI convert function isn't doing this for us.
+     */
+    /* unop vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    GET_VREG_F(fa0, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
-f2l_doconv:
 #ifdef MIPS32REVGE6
-    l.s       fa1, .LLONG_TO_max
-    cmp.ule.s ft2, fa1, fa0
-    li        rRESULT0, ~0
-    li        rRESULT1, ~0x80000000
-    bc1nez    ft2, .L${opcode}_set_vreg
-
-    l.s       fa1, .LLONG_TO_min
-    cmp.ule.s ft2, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0x80000000
-    bc1nez    ft2, .L${opcode}_set_vreg
-
-    mov.s     fa1, fa0
-    cmp.un.s  ft2, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0
-    bc1nez    ft2, .L${opcode}_set_vreg
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    li        t0, LONG_MIN_AS_FLOAT
+    mtc1      t0, fa1
+    cmp.le.s  ft0, fa1, fa0
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1nez    ft0, 1f                      #  if LONG_MIN <= vB, proceed to truncation
+    cmp.eq.s  ft0, fa0, fa0
+    selnez.s  fa0, fa1, ft0                #  fa0 = ordered(vB) ? LONG_MIN_AS_FLOAT : 0
+1:
+    trunc.l.s fa0, fa0
+    SET_VREG64_F_GOTO(fa0, fa0f, rOBJ, t1) #  vA <- result
 #else
-    l.s       fa1, .LLONG_TO_max
-    c.ole.s   fcc0, fa1, fa0
-    li        rRESULT0, ~0
-    li        rRESULT1, ~0x80000000
-    bc1t      .L${opcode}_set_vreg
-
-    l.s       fa1, .LLONG_TO_min
-    c.ole.s   fcc0, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0x80000000
-    bc1t      .L${opcode}_set_vreg
-
-    mov.s     fa1, fa0
-    c.un.s    fcc0, fa0, fa1
+    c.eq.s    fcc0, fa0, fa0
     li        rRESULT0, 0
     li        rRESULT1, 0
-    bc1t      .L${opcode}_set_vreg
-#endif
+    bc1f      fcc0, .L${opcode}_get_opcode
+
+    li        t0, LONG_MIN_AS_FLOAT
+    mtc1      t0, fa1
+    c.ole.s   fcc0, fa0, fa1
+    li        rRESULT1, LONG_MIN_HIGH
+    bc1t      fcc0, .L${opcode}_get_opcode
+
+    neg.s     fa1, fa1
+    c.ole.s   fcc0, fa1, fa0
+    nor       rRESULT0, rRESULT0, zero
+    nor       rRESULT1, rRESULT1, zero
+    bc1t      fcc0, .L${opcode}_get_opcode
 
     JAL(__fixsfdi)
-
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
     b         .L${opcode}_set_vreg
+#endif
+%break
 
-.LLONG_TO_max:
-    .word 0x5f000000
-
-.LLONG_TO_min:
-    .word 0xdf000000
+#ifndef MIPS32REVGE6
+.L${opcode}_get_opcode:
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+.L${opcode}_set_vreg:
+    SET_VREG64_GOTO(rRESULT0, rRESULT1, rOBJ, t1)   #  vA/vA+1 <- v0/v1
+#endif
diff --git a/runtime/interpreter/mterp/mips/op_goto_32.S b/runtime/interpreter/mterp/mips/op_goto_32.S
index 67f52e9..ef5bf6b 100644
--- a/runtime/interpreter/mterp/mips/op_goto_32.S
+++ b/runtime/interpreter/mterp/mips/op_goto_32.S
@@ -8,8 +8,7 @@
      * our "backward branch" test must be "<=0" instead of "<0".
      */
     /* goto/32 +AAAAAAAA */
-    FETCH(a0, 1)                           #  a0 <- aaaa (lo)
+    FETCH(rINST, 1)                        #  rINST <- aaaa (lo)
     FETCH(a1, 2)                           #  a1 <- AAAA (hi)
-    sll       a1, a1, 16
-    or        rINST, a0, a1                #  rINST <- AAAAaaaa
+    INSERT_HIGH_HALF(rINST, a1)            #  rINST <- AAAAaaaa
     b         MterpCommonTakenBranchNoFlags
diff --git a/runtime/interpreter/mterp/mips/op_iget.S b/runtime/interpreter/mterp/mips/op_iget.S
index 86d44fa..01f42d9 100644
--- a/runtime/interpreter/mterp/mips/op_iget.S
+++ b/runtime/interpreter/mterp/mips/op_iget.S
@@ -4,6 +4,7 @@
      *
      * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
      */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
     GET_OPB(a1)                            # a1 <- B
@@ -15,11 +16,10 @@
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
     bnez  a3, MterpPossibleException        # bail out
-    .if $is_object
-    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
-    .else
-    SET_VREG(v0, a2)                       # fp[A] <- v0
-    .endif
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    .if $is_object
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[A] <- v0
+    .else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[A] <- v0
+    .endif
diff --git a/runtime/interpreter/mterp/mips/op_iget_object_quick.S b/runtime/interpreter/mterp/mips/op_iget_object_quick.S
index 31d94b9..95c34d7 100644
--- a/runtime/interpreter/mterp/mips/op_iget_object_quick.S
+++ b/runtime/interpreter/mterp/mips/op_iget_object_quick.S
@@ -9,7 +9,6 @@
     GET_OPA4(a2)                           #  a2<- A+
     PREFETCH_INST(2)                       #  load rINST
     bnez a3, MterpPossibleException        #  bail out
-    SET_VREG_OBJECT(v0, a2)                #  fp[A] <- v0
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       #  fp[A] <- v0
diff --git a/runtime/interpreter/mterp/mips/op_iget_quick.S b/runtime/interpreter/mterp/mips/op_iget_quick.S
index fbafa5b..46277d3 100644
--- a/runtime/interpreter/mterp/mips/op_iget_quick.S
+++ b/runtime/interpreter/mterp/mips/op_iget_quick.S
@@ -1,6 +1,6 @@
 %default { "load":"lw" }
     /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- object we're operating on
     FETCH(a1, 1)                           #  a1 <- field byte offset
diff --git a/runtime/interpreter/mterp/mips/op_iget_wide.S b/runtime/interpreter/mterp/mips/op_iget_wide.S
index 8fe3089..cf5019e 100644
--- a/runtime/interpreter/mterp/mips/op_iget_wide.S
+++ b/runtime/interpreter/mterp/mips/op_iget_wide.S
@@ -3,6 +3,7 @@
      *
      * for: iget-wide
      */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field byte offset
     GET_OPB(a1)                            # a1 <- B
@@ -14,7 +15,6 @@
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
     bnez a3, MterpException                # bail out
-    SET_VREG64(v0, v1, a2)                 # fp[A] <- v0/v1
     ADVANCE(2)                             # advance rPC
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+    SET_VREG64_GOTO(v0, v1, a2, t0)        # fp[A] <- v0/v1
diff --git a/runtime/interpreter/mterp/mips/op_iget_wide_quick.S b/runtime/interpreter/mterp/mips/op_iget_wide_quick.S
index 4d2f291..128be57 100644
--- a/runtime/interpreter/mterp/mips/op_iget_wide_quick.S
+++ b/runtime/interpreter/mterp/mips/op_iget_wide_quick.S
@@ -1,4 +1,4 @@
-    # iget-wide-quick vA, vB, offset       /* CCCC */
+    /* iget-wide-quick vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- object we're operating on
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -9,5 +9,4 @@
     LOAD64(a0, a1, t0)                     #  a0 <- obj.field (64 bits, aligned)
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[A] <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[A] <- a0/a1
diff --git a/runtime/interpreter/mterp/mips/op_instance_of.S b/runtime/interpreter/mterp/mips/op_instance_of.S
index d2679bd..706dcf3 100644
--- a/runtime/interpreter/mterp/mips/op_instance_of.S
+++ b/runtime/interpreter/mterp/mips/op_instance_of.S
@@ -4,7 +4,7 @@
      * Most common situation is a non-null object, being compared against
      * an already-resolved class.
      */
-    # instance-of vA, vB, class            /* CCCC */
+    /* instance-of vA, vB, class@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- CCCC
     GET_OPB(a1)                            # a1 <- B
diff --git a/runtime/interpreter/mterp/mips/op_int_to_byte.S b/runtime/interpreter/mterp/mips/op_int_to_byte.S
index 77314c62..9266aab 100644
--- a/runtime/interpreter/mterp/mips/op_int_to_byte.S
+++ b/runtime/interpreter/mterp/mips/op_int_to_byte.S
@@ -1 +1 @@
-%include "mips/unop.S" {"preinstr":"sll a0, a0, 24", "instr":"sra a0, a0, 24"}
+%include "mips/unop.S" {"instr":"SEB(a0, a0)"}
diff --git a/runtime/interpreter/mterp/mips/op_int_to_short.S b/runtime/interpreter/mterp/mips/op_int_to_short.S
index 5649c2a..8749cd8 100644
--- a/runtime/interpreter/mterp/mips/op_int_to_short.S
+++ b/runtime/interpreter/mterp/mips/op_int_to_short.S
@@ -1 +1 @@
-%include "mips/unop.S" {"preinstr":"sll a0, 16", "instr":"sra a0, 16"}
+%include "mips/unop.S" {"instr":"SEH(a0, a0)"}
diff --git a/runtime/interpreter/mterp/mips/op_iput.S b/runtime/interpreter/mterp/mips/op_iput.S
index 732a9a4..9133d60 100644
--- a/runtime/interpreter/mterp/mips/op_iput.S
+++ b/runtime/interpreter/mterp/mips/op_iput.S
@@ -4,7 +4,7 @@
      *
      * for: iput, iput-boolean, iput-byte, iput-char, iput-short
      */
-    # op vA, vB, field                     /* CCCC */
+    /* op vA, vB, field@CCCC */
     .extern $handler
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
diff --git a/runtime/interpreter/mterp/mips/op_iput_object.S b/runtime/interpreter/mterp/mips/op_iput_object.S
index 6b856e7..cfa56ec 100644
--- a/runtime/interpreter/mterp/mips/op_iput_object.S
+++ b/runtime/interpreter/mterp/mips/op_iput_object.S
@@ -3,7 +3,7 @@
      *
      * for: iput-object, iput-object-volatile
      */
-    # op vA, vB, field                     /* CCCC */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     addu   a0, rFP, OFF_FP_SHADOWFRAME
     move   a1, rPC
diff --git a/runtime/interpreter/mterp/mips/op_iput_object_quick.S b/runtime/interpreter/mterp/mips/op_iput_object_quick.S
index c3f1526..82044f5 100644
--- a/runtime/interpreter/mterp/mips/op_iput_object_quick.S
+++ b/runtime/interpreter/mterp/mips/op_iput_object_quick.S
@@ -1,5 +1,5 @@
     /* For: iput-object-quick */
-    # op vA, vB, offset                 /* CCCC */
+    /* op vA, vB, offset@CCCC */
     EXPORT_PC()
     addu   a0, rFP, OFF_FP_SHADOWFRAME
     move   a1, rPC
diff --git a/runtime/interpreter/mterp/mips/op_iput_quick.S b/runtime/interpreter/mterp/mips/op_iput_quick.S
index 0829666..d9753b1 100644
--- a/runtime/interpreter/mterp/mips/op_iput_quick.S
+++ b/runtime/interpreter/mterp/mips/op_iput_quick.S
@@ -1,6 +1,6 @@
 %default { "store":"sw" }
     /* For: iput-quick, iput-object-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- fp[B], the object pointer
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -9,6 +9,7 @@
     GET_VREG(a0, a2)                       #  a0 <- fp[A]
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     addu      t0, a3, a1
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t1)
     $store    a0, 0(t0)                    #  obj.field (8/16/32 bits) <- a0
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t1)                                 #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_iput_wide.S b/runtime/interpreter/mterp/mips/op_iput_wide.S
index 6d23f8c..bc3d758 100644
--- a/runtime/interpreter/mterp/mips/op_iput_wide.S
+++ b/runtime/interpreter/mterp/mips/op_iput_wide.S
@@ -1,4 +1,4 @@
-    # iput-wide vA, vB, field              /* CCCC */
+    /* iput-wide vA, vB, field@CCCC */
     .extern artSet64InstanceFromMterp
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
diff --git a/runtime/interpreter/mterp/mips/op_iput_wide_quick.S b/runtime/interpreter/mterp/mips/op_iput_wide_quick.S
index 9fdb847..0eb228d 100644
--- a/runtime/interpreter/mterp/mips/op_iput_wide_quick.S
+++ b/runtime/interpreter/mterp/mips/op_iput_wide_quick.S
@@ -1,4 +1,4 @@
-    # iput-wide-quick vA, vB, offset       /* CCCC */
+    /* iput-wide-quick vA, vB, offset@CCCC */
     GET_OPA4(a0)                           #  a0 <- A(+)
     GET_OPB(a1)                            #  a1 <- B
     GET_VREG(a2, a1)                       #  a2 <- fp[B], the object pointer
@@ -9,6 +9,7 @@
     FETCH(a3, 1)                           #  a3 <- field byte offset
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     addu      a2, a2, a3                   #  obj.field (64 bits, aligned) <- a0/a1
-    STORE64(a0, a1, a2)                    #  obj.field (64 bits, aligned) <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    GET_OPCODE_TARGET(t0)
+    STORE64(a0, a1, a2)                    #  obj.field (64 bits, aligned) <- a0/a1
+    JR(t0)                                 #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_long_to_double.S b/runtime/interpreter/mterp/mips/op_long_to_double.S
index b83aaf4..153f582 100644
--- a/runtime/interpreter/mterp/mips/op_long_to_double.S
+++ b/runtime/interpreter/mterp/mips/op_long_to_double.S
@@ -1 +1,20 @@
-%include "mips/funopWide.S" {"instr":"JAL(__floatdidf)", "ld_arg":"LOAD64(rARG0, rARG1, a3)"}
+    /*
+     * long-to-double
+     */
+    /* unop vA, vB */
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    GET_OPB(a3)                            #  a3 <- B
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+
+#ifdef MIPS32REVGE6
+    LOAD64_F(fv0, fv0f, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    cvt.d.l   fv0, fv0
+#else
+    LOAD64(rARG0, rARG1, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    JAL(__floatdidf)                       #  a0/a1 <- op, a2-a3 changed
+#endif
+
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0) #  vA/vA+1 <- result
diff --git a/runtime/interpreter/mterp/mips/op_long_to_float.S b/runtime/interpreter/mterp/mips/op_long_to_float.S
index 27faba5..dd1ab81 100644
--- a/runtime/interpreter/mterp/mips/op_long_to_float.S
+++ b/runtime/interpreter/mterp/mips/op_long_to_float.S
@@ -1 +1,20 @@
-%include "mips/unopNarrower.S" {"instr":"JAL(__floatdisf)", "load":"LOAD64(rARG0, rARG1, a3)"}
+    /*
+     * long-to-float
+     */
+    /* unop vA, vB */
+    GET_OPB(a3)                            #  a3 <- B
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
+    EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+
+#ifdef MIPS32REVGE6
+    LOAD64_F(fv0, fv0f, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    cvt.s.l   fv0, fv0
+#else
+    LOAD64(rARG0, rARG1, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    JAL(__floatdisf)
+#endif
+
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- fv0
diff --git a/runtime/interpreter/mterp/mips/op_move.S b/runtime/interpreter/mterp/mips/op_move.S
index 76588ba..547ea3a 100644
--- a/runtime/interpreter/mterp/mips/op_move.S
+++ b/runtime/interpreter/mterp/mips/op_move.S
@@ -7,8 +7,7 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[B]
     GET_INST_OPCODE(t0)                    #  t0 <- opcode from rINST
     .if $is_object
-    SET_VREG_OBJECT(a2, a0)                #  fp[A] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[A] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[A] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[A] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_move_16.S b/runtime/interpreter/mterp/mips/op_move_16.S
index f7de6c2..91b7399 100644
--- a/runtime/interpreter/mterp/mips/op_move_16.S
+++ b/runtime/interpreter/mterp/mips/op_move_16.S
@@ -7,8 +7,7 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[BBBB]
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     .if $is_object
-    SET_VREG_OBJECT(a2, a0)                #  fp[AAAA] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[AAAA] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[AAAA] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[AAAA] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_move_exception.S b/runtime/interpreter/mterp/mips/op_move_exception.S
index f04a035..f1bece7 100644
--- a/runtime/interpreter/mterp/mips/op_move_exception.S
+++ b/runtime/interpreter/mterp/mips/op_move_exception.S
@@ -2,7 +2,8 @@
     GET_OPA(a2)                                 #  a2 <- AA
     lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)    #  get exception obj
     FETCH_ADVANCE_INST(1)                       #  advance rPC, load rINST
-    SET_VREG_OBJECT(a3, a2)                     #  fp[AA] <- exception obj
     GET_INST_OPCODE(t0)                         #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
+    SET_VREG_OBJECT(a3, a2)                     #  fp[AA] <- exception obj
     sw    zero, THREAD_EXCEPTION_OFFSET(rSELF)  #  clear exception
-    GOTO_OPCODE(t0)                             #  jump to next instruction
+    JR(t0)                                      #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_move_from16.S b/runtime/interpreter/mterp/mips/op_move_from16.S
index b8be741..90c25c9 100644
--- a/runtime/interpreter/mterp/mips/op_move_from16.S
+++ b/runtime/interpreter/mterp/mips/op_move_from16.S
@@ -7,8 +7,7 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[BBBB]
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     .if $is_object
-    SET_VREG_OBJECT(a2, a0)                #  fp[AA] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[AA] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[AA] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[AA] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_move_result.S b/runtime/interpreter/mterp/mips/op_move_result.S
index 315c68e..a4d5bfe 100644
--- a/runtime/interpreter/mterp/mips/op_move_result.S
+++ b/runtime/interpreter/mterp/mips/op_move_result.S
@@ -7,8 +7,7 @@
     lw    a0, 0(a0)                        #  a0 <- result.i
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     .if $is_object
-    SET_VREG_OBJECT(a0, a2)                #  fp[AA] <- a0
+    SET_VREG_OBJECT_GOTO(a0, a2, t0)       #  fp[AA] <- a0
     .else
-    SET_VREG(a0, a2)                       #  fp[AA] <- a0
+    SET_VREG_GOTO(a0, a2, t0)              #  fp[AA] <- a0
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
diff --git a/runtime/interpreter/mterp/mips/op_move_result_wide.S b/runtime/interpreter/mterp/mips/op_move_result_wide.S
index 940c1ff..1259218 100644
--- a/runtime/interpreter/mterp/mips/op_move_result_wide.S
+++ b/runtime/interpreter/mterp/mips/op_move_result_wide.S
@@ -3,6 +3,5 @@
     lw    a3, OFF_FP_RESULT_REGISTER(rFP)  #  get pointer to result JType
     LOAD64(a0, a1, a3)                     #  a0/a1 <- retval.j
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[AA] <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[AA] <- a0/a1
diff --git a/runtime/interpreter/mterp/mips/op_move_wide.S b/runtime/interpreter/mterp/mips/op_move_wide.S
index dd224c3..01d0949 100644
--- a/runtime/interpreter/mterp/mips/op_move_wide.S
+++ b/runtime/interpreter/mterp/mips/op_move_wide.S
@@ -5,6 +5,5 @@
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
     LOAD64(a0, a1, a3)                     #  a0/a1 <- fp[B]
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[A] <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[A] <- a0/a1
diff --git a/runtime/interpreter/mterp/mips/op_move_wide_16.S b/runtime/interpreter/mterp/mips/op_move_wide_16.S
index d8761eb..587ba04 100644
--- a/runtime/interpreter/mterp/mips/op_move_wide_16.S
+++ b/runtime/interpreter/mterp/mips/op_move_wide_16.S
@@ -5,6 +5,5 @@
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[BBBB]
     LOAD64(a0, a1, a3)                     #  a0/a1 <- fp[BBBB]
     FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[AAAA] <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[AAAA] <- a0/a1
diff --git a/runtime/interpreter/mterp/mips/op_move_wide_from16.S b/runtime/interpreter/mterp/mips/op_move_wide_from16.S
index 2103fa1..5003fbd 100644
--- a/runtime/interpreter/mterp/mips/op_move_wide_from16.S
+++ b/runtime/interpreter/mterp/mips/op_move_wide_from16.S
@@ -5,6 +5,5 @@
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[BBBB]
     LOAD64(a0, a1, a3)                     #  a0/a1 <- fp[BBBB]
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[AA] <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[AA] <- a0/a1
diff --git a/runtime/interpreter/mterp/mips/op_mul_long.S b/runtime/interpreter/mterp/mips/op_mul_long.S
index 803bbec..74b049a 100644
--- a/runtime/interpreter/mterp/mips/op_mul_long.S
+++ b/runtime/interpreter/mterp/mips/op_mul_long.S
@@ -39,5 +39,4 @@
 
 .L${opcode}_finish:
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(v0, v1, a0)                 #  vAA::vAA+1 <- v0(low) :: v1(high)
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(v0, v1, a0, t0)        #  vAA/vAA+1 <- v0(low)/v1(high)
diff --git a/runtime/interpreter/mterp/mips/op_mul_long_2addr.S b/runtime/interpreter/mterp/mips/op_mul_long_2addr.S
index 6950b71..683b055 100644
--- a/runtime/interpreter/mterp/mips/op_mul_long_2addr.S
+++ b/runtime/interpreter/mterp/mips/op_mul_long_2addr.S
@@ -26,6 +26,4 @@
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    # vAA <- v0 (low)
-    SET_VREG64(v0, v1, rOBJ)               #  vAA+1 <- v1 (high)
-    GOTO_OPCODE(t1)                        #  jump to next instruction
+    SET_VREG64_GOTO(v0, v1, rOBJ, t1)      #  vA/vA+1 <- v0(low)/v1(high)
diff --git a/runtime/interpreter/mterp/mips/op_new_instance.S b/runtime/interpreter/mterp/mips/op_new_instance.S
index 51a09b2..3c9e83f 100644
--- a/runtime/interpreter/mterp/mips/op_new_instance.S
+++ b/runtime/interpreter/mterp/mips/op_new_instance.S
@@ -1,7 +1,7 @@
     /*
      * Create a new instance of a class.
      */
-    # new-instance vAA, class              /* BBBB */
+    /* new-instance vAA, class@BBBB */
     EXPORT_PC()
     addu   a0, rFP, OFF_FP_SHADOWFRAME
     move   a1, rSELF
diff --git a/runtime/interpreter/mterp/mips/op_packed_switch.S b/runtime/interpreter/mterp/mips/op_packed_switch.S
index ffa4f47..0a1ff98 100644
--- a/runtime/interpreter/mterp/mips/op_packed_switch.S
+++ b/runtime/interpreter/mterp/mips/op_packed_switch.S
@@ -12,8 +12,7 @@
     FETCH(a0, 1)                           #  a0 <- bbbb (lo)
     FETCH(a1, 2)                           #  a1 <- BBBB (hi)
     GET_OPA(a3)                            #  a3 <- AA
-    sll       t0, a1, 16
-    or        a0, a0, t0                   #  a0 <- BBBBbbbb
+    INSERT_HIGH_HALF(a0, a1)               #  a0 <- BBBBbbbb
     GET_VREG(a1, a3)                       #  a1 <- vAA
     EAS1(a0, rPC, a0)                      #  a0 <- PC + BBBBbbbb*2
     JAL($func)                             #  a0 <- code-unit branch offset
diff --git a/runtime/interpreter/mterp/mips/op_return.S b/runtime/interpreter/mterp/mips/op_return.S
index 894ae18..44b9395 100644
--- a/runtime/interpreter/mterp/mips/op_return.S
+++ b/runtime/interpreter/mterp/mips/op_return.S
@@ -8,7 +8,7 @@
     JAL(MterpThreadFenceForConstructor)
     lw        ra, THREAD_FLAGS_OFFSET(rSELF)
     move      a0, rSELF
-    and       ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and       ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqz      ra, 1f
     JAL(MterpSuspendCheck)                 # (self)
 1:
diff --git a/runtime/interpreter/mterp/mips/op_return_void.S b/runtime/interpreter/mterp/mips/op_return_void.S
index 35c1326..1f616ea 100644
--- a/runtime/interpreter/mterp/mips/op_return_void.S
+++ b/runtime/interpreter/mterp/mips/op_return_void.S
@@ -2,7 +2,7 @@
     JAL(MterpThreadFenceForConstructor)
     lw        ra, THREAD_FLAGS_OFFSET(rSELF)
     move      a0, rSELF
-    and       ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and       ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqz      ra, 1f
     JAL(MterpSuspendCheck)                 # (self)
 1:
diff --git a/runtime/interpreter/mterp/mips/op_return_void_no_barrier.S b/runtime/interpreter/mterp/mips/op_return_void_no_barrier.S
index 56968b5..e670c28 100644
--- a/runtime/interpreter/mterp/mips/op_return_void_no_barrier.S
+++ b/runtime/interpreter/mterp/mips/op_return_void_no_barrier.S
@@ -1,6 +1,6 @@
     lw     ra, THREAD_FLAGS_OFFSET(rSELF)
     move   a0, rSELF
-    and    ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and    ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqz   ra, 1f
     JAL(MterpSuspendCheck)                 # (self)
 1:
diff --git a/runtime/interpreter/mterp/mips/op_return_wide.S b/runtime/interpreter/mterp/mips/op_return_wide.S
index 91d62bf..f0f679d 100644
--- a/runtime/interpreter/mterp/mips/op_return_wide.S
+++ b/runtime/interpreter/mterp/mips/op_return_wide.S
@@ -6,7 +6,7 @@
     JAL(MterpThreadFenceForConstructor)
     lw        ra, THREAD_FLAGS_OFFSET(rSELF)
     move      a0, rSELF
-    and       ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and       ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqz      ra, 1f
     JAL(MterpSuspendCheck)                 # (self)
 1:
diff --git a/runtime/interpreter/mterp/mips/op_sget.S b/runtime/interpreter/mterp/mips/op_sget.S
index 3efcfbb..64ece1e 100644
--- a/runtime/interpreter/mterp/mips/op_sget.S
+++ b/runtime/interpreter/mterp/mips/op_sget.S
@@ -4,7 +4,7 @@
      *
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     .extern $helper
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
@@ -15,11 +15,10 @@
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
     bnez  a3, MterpException               # bail out
-.if $is_object
-    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
-.else
-    SET_VREG(v0, a2)                       # fp[AA] <- v0
-.endif
     ADVANCE(2)
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+.if $is_object
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[AA] <- v0
+.else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[AA] <- v0
+.endif
diff --git a/runtime/interpreter/mterp/mips/op_sget_wide.S b/runtime/interpreter/mterp/mips/op_sget_wide.S
index 7aee386..c729250 100644
--- a/runtime/interpreter/mterp/mips/op_sget_wide.S
+++ b/runtime/interpreter/mterp/mips/op_sget_wide.S
@@ -1,7 +1,7 @@
     /*
      * 64-bit SGET handler.
      */
-    # sget-wide vAA, field                 /* BBBB */
+    /* sget-wide vAA, field@BBBB */
     .extern artGet64StaticFromCode
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
@@ -12,6 +12,5 @@
     bnez  a3, MterpException
     GET_OPA(a1)                            # a1 <- AA
     FETCH_ADVANCE_INST(2)                  # advance rPC, load rINST
-    SET_VREG64(v0, v1, a1)                 # vAA/vAA+1 <- v0/v1
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+    SET_VREG64_GOTO(v0, v1, a1, t0)        # vAA/vAA+1 <- v0/v1
diff --git a/runtime/interpreter/mterp/mips/op_shl_long.S b/runtime/interpreter/mterp/mips/op_shl_long.S
index 0121669..cc08112 100644
--- a/runtime/interpreter/mterp/mips/op_shl_long.S
+++ b/runtime/interpreter/mterp/mips/op_shl_long.S
@@ -24,7 +24,7 @@
     srl     a0, v1                         #  alo<- alo >> (32-(shift&31))
     sll     v1, a1, a2                     #  rhi<- ahi << (shift&31)
     or      v1, a0                         #  rhi<- rhi | alo
-    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vAA/vAA+1 <- a0/a1
+    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vAA/vAA+1 <- v0/v1
 %break
 
 .L${opcode}_finish:
diff --git a/runtime/interpreter/mterp/mips/op_shl_long_2addr.S b/runtime/interpreter/mterp/mips/op_shl_long_2addr.S
index 8ce6058..93c5783 100644
--- a/runtime/interpreter/mterp/mips/op_shl_long_2addr.S
+++ b/runtime/interpreter/mterp/mips/op_shl_long_2addr.S
@@ -7,7 +7,7 @@
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG(a2, a3)                       #  a2 <- vB
     EAS2(t2, rFP, rOBJ)                    #  t2 <- &fp[A]
-    LOAD64(a0, a1, t2)                     #  a0/a1 <- vAA/vAA+1
+    LOAD64(a0, a1, t2)                     #  a0/a1 <- vA/vA+1
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
@@ -20,8 +20,8 @@
     srl     a0, v1                         #  alo<- alo >> (32-(shift&31))
     sll     v1, a1, a2                     #  rhi<- ahi << (shift&31)
     or      v1, a0                         #  rhi<- rhi | alo
-    SET_VREG64_GOTO(v0, v1, rOBJ, t0)      #  vAA/vAA+1 <- a0/a1
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)      #  vA/vA+1 <- v0/v1
 %break
 
 .L${opcode}_finish:
-    SET_VREG64_GOTO(zero, v0, rOBJ, t0)    #  vAA/vAA+1 <- rlo/rhi
+    SET_VREG64_GOTO(zero, v0, rOBJ, t0)    #  vA/vA+1 <- rlo/rhi
diff --git a/runtime/interpreter/mterp/mips/op_shr_long.S b/runtime/interpreter/mterp/mips/op_shr_long.S
index 4c42758..ea032fe 100644
--- a/runtime/interpreter/mterp/mips/op_shr_long.S
+++ b/runtime/interpreter/mterp/mips/op_shr_long.S
@@ -23,7 +23,7 @@
     sll     a1, 1
     sll     a1, a0                         #  ahi<- ahi << (32-(shift&31))
     or      v0, a1                         #  rlo<- rlo | ahi
-    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vAA/VAA+1 <- v0/v0
+    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vAA/VAA+1 <- v0/v1
 %break
 
 .L${opcode}_finish:
diff --git a/runtime/interpreter/mterp/mips/op_shr_long_2addr.S b/runtime/interpreter/mterp/mips/op_shr_long_2addr.S
index 3adc085..c805ea4 100644
--- a/runtime/interpreter/mterp/mips/op_shr_long_2addr.S
+++ b/runtime/interpreter/mterp/mips/op_shr_long_2addr.S
@@ -7,7 +7,7 @@
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG(a2, a3)                       #  a2 <- vB
     EAS2(t0, rFP, t2)                      #  t0 <- &fp[A]
-    LOAD64(a0, a1, t0)                     #  a0/a1 <- vAA/vAA+1
+    LOAD64(a0, a1, t0)                     #  a0/a1 <- vA/vA+1
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
 
@@ -19,9 +19,9 @@
     sll     a1, 1
     sll     a1, a0                         #  ahi<- ahi << (32-(shift&31))
     or      v0, a1                         #  rlo<- rlo | ahi
-    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vAA/vAA+1 <- a0/a1
+    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vA/vA+1 <- v0/v1
 %break
 
 .L${opcode}_finish:
     sra     a3, a1, 31                     #  a3<- sign(ah)
-    SET_VREG64_GOTO(v1, a3, t2, t0)        #  vAA/vAA+1 <- rlo/rhi
+    SET_VREG64_GOTO(v1, a3, t2, t0)        #  vA/vA+1 <- rlo/rhi
diff --git a/runtime/interpreter/mterp/mips/op_sput.S b/runtime/interpreter/mterp/mips/op_sput.S
index ee313b9..7034a0e 100644
--- a/runtime/interpreter/mterp/mips/op_sput.S
+++ b/runtime/interpreter/mterp/mips/op_sput.S
@@ -4,7 +4,7 @@
      *
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     GET_OPA(a3)                            # a3 <- AA
diff --git a/runtime/interpreter/mterp/mips/op_sput_wide.S b/runtime/interpreter/mterp/mips/op_sput_wide.S
index 1e11466..3b347fc 100644
--- a/runtime/interpreter/mterp/mips/op_sput_wide.S
+++ b/runtime/interpreter/mterp/mips/op_sput_wide.S
@@ -1,7 +1,7 @@
     /*
      * 64-bit SPUT handler.
      */
-    # sput-wide vAA, field                 /* BBBB */
+    /* sput-wide vAA, field@BBBB */
     .extern artSet64IndirectStaticFromMterp
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
diff --git a/runtime/interpreter/mterp/mips/op_unused_fa.S b/runtime/interpreter/mterp/mips/op_unused_fa.S
deleted file mode 100644
index 99ef3cf..0000000
--- a/runtime/interpreter/mterp/mips/op_unused_fa.S
+++ /dev/null
@@ -1 +0,0 @@
-%include "mips/unused.S"
diff --git a/runtime/interpreter/mterp/mips/op_unused_fb.S b/runtime/interpreter/mterp/mips/op_unused_fb.S
deleted file mode 100644
index 99ef3cf..0000000
--- a/runtime/interpreter/mterp/mips/op_unused_fb.S
+++ /dev/null
@@ -1 +0,0 @@
-%include "mips/unused.S"
diff --git a/runtime/interpreter/mterp/mips/op_ushr_long_2addr.S b/runtime/interpreter/mterp/mips/op_ushr_long_2addr.S
index ccf1f7e..9e93f34 100644
--- a/runtime/interpreter/mterp/mips/op_ushr_long_2addr.S
+++ b/runtime/interpreter/mterp/mips/op_ushr_long_2addr.S
@@ -7,7 +7,7 @@
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG(a2, a3)                       #  a2 <- vB
     EAS2(t0, rFP, t3)                      #  t0 <- &fp[A]
-    LOAD64(a0, a1, t0)                     #  a0/a1 <- vAA/vAA+1
+    LOAD64(a0, a1, t0)                     #  a0/a1 <- vA/vA+1
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
@@ -20,8 +20,8 @@
     sll       a1, 1
     sll       a1, a0                       #  ahi<- ahi << (32-(shift&31))
     or        v0, a1                       #  rlo<- rlo | ahi
-    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vAA/vAA+1 <- a0/a1
+    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vA/vA+1 <- v0/v1
 %break
 
 .L${opcode}_finish:
-    SET_VREG64_GOTO(v1, zero, t3, t0)      #  vAA/vAA+1 <- rlo/rhi
+    SET_VREG64_GOTO(v1, zero, t3, t0)      #  vA/vA+1 <- rlo/rhi
diff --git a/runtime/interpreter/mterp/mips/unop.S b/runtime/interpreter/mterp/mips/unop.S
index 52a8f0a..bc99263 100644
--- a/runtime/interpreter/mterp/mips/unop.S
+++ b/runtime/interpreter/mterp/mips/unop.S
@@ -1,11 +1,11 @@
 %default {"preinstr":"", "result0":"a0"}
     /*
      * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
+     * specifies an instruction that performs "result0 = op a0".
      * This could be a MIPS instruction or a function call.
      *
-     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
-     *      int-to-byte, int-to-char, int-to-short
+     * for: int-to-byte, int-to-char, int-to-short,
+     *      neg-int, not-int, neg-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
@@ -15,5 +15,4 @@
     $preinstr                              #  optional op
     $instr                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    SET_VREG_GOTO($result0, t0, t1)        #  vAA <- result0
-    /* 9-10 instructions */
+    SET_VREG_GOTO($result0, t0, t1)        #  vA <- result0
diff --git a/runtime/interpreter/mterp/mips/unopNarrower.S b/runtime/interpreter/mterp/mips/unopNarrower.S
index 9c38bad..0196e27 100644
--- a/runtime/interpreter/mterp/mips/unopNarrower.S
+++ b/runtime/interpreter/mterp/mips/unopNarrower.S
@@ -1,24 +1,16 @@
 %default {"load":"LOAD64_F(fa0, fa0f, a3)"}
     /*
-     * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0/a1", where
-     * "result" is a 32-bit quantity in a0.
+     * Generic 64bit-to-32bit floating-point unary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = op fa0".
      *
-     * For: long-to-float, double-to-int, double-to-float
-     * If hard floating point support is available, use fa0 as the parameter,
-     * except for long-to-float opcode.
-     * (This would work for long-to-int, but that instruction is actually
-     * an exact match for OP_MOVE.)
+     * For: double-to-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
     $load
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     $instr
-
-.L${opcode}_set_vreg_f:
-    SET_VREG_F(fv0, rOBJ)                  #  vA <- result0
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- fv0
diff --git a/runtime/interpreter/mterp/mips/unopWide.S b/runtime/interpreter/mterp/mips/unopWide.S
index fd25dff..135d9fa 100644
--- a/runtime/interpreter/mterp/mips/unopWide.S
+++ b/runtime/interpreter/mterp/mips/unopWide.S
@@ -1,7 +1,7 @@
 %default {"preinstr":"", "result0":"a0", "result1":"a1"}
     /*
      * Generic 64-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0/a1".
+     * specifies an instruction that performs "result0/result1 = op a0/a1".
      * This could be MIPS instruction or a function call.
      *
      * For: neg-long, not-long, neg-double,
@@ -10,11 +10,9 @@
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
-    LOAD64(a0, a1, a3)                     #  a0/a1 <- vAA
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- vA
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     $preinstr                              #  optional op
     $instr                                 #  a0/a1 <- op, a2-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64($result0, $result1, rOBJ)   #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-13 instructions */
+    SET_VREG64_GOTO($result0, $result1, rOBJ, t0)   #  vA/vA+1 <- a0/a1
diff --git a/runtime/interpreter/mterp/mips/unopWider.S b/runtime/interpreter/mterp/mips/unopWider.S
index 1c18837..ca888ad 100644
--- a/runtime/interpreter/mterp/mips/unopWider.S
+++ b/runtime/interpreter/mterp/mips/unopWider.S
@@ -1,8 +1,7 @@
 %default {"preinstr":"", "result0":"a0", "result1":"a1"}
     /*
      * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0", where
-     * "result" is a 64-bit quantity in a0/a1.
+     * that specifies an instruction that performs "result0/result1 = op a0".
      *
      * For: int-to-long
      */
@@ -14,6 +13,4 @@
     $preinstr                              #  optional op
     $instr                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64($result0, $result1, rOBJ)   #  vA/vA+1 <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 10-11 instructions */
+    SET_VREG64_GOTO($result0, $result1, rOBJ, t0)   #  vA/vA+1 <- a0/a1
diff --git a/runtime/interpreter/mterp/mips64/footer.S b/runtime/interpreter/mterp/mips64/footer.S
index 4063162..64772c8 100644
--- a/runtime/interpreter/mterp/mips64/footer.S
+++ b/runtime/interpreter/mterp/mips64/footer.S
@@ -108,7 +108,7 @@
     REFRESH_IBASE
     daddu   a2, rINST, rINST            # a2<- byte offset
     FETCH_ADVANCE_INST_RB a2            # update rPC, load rINST
-    and     ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     bnezc   ra, .L_suspend_request_pending
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
@@ -225,7 +225,7 @@
     lw      ra, THREAD_FLAGS_OFFSET(rSELF)
     sd      a0, 0(a2)
     move    a0, rSELF
-    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqzc   ra, check2
     jal     MterpSuspendCheck                       # (self)
 check2:
diff --git a/runtime/interpreter/mterp/mips64/op_return.S b/runtime/interpreter/mterp/mips64/op_return.S
index b10c03f..edd795f 100644
--- a/runtime/interpreter/mterp/mips64/op_return.S
+++ b/runtime/interpreter/mterp/mips64/op_return.S
@@ -10,7 +10,7 @@
     jal     MterpThreadFenceForConstructor
     lw      ra, THREAD_FLAGS_OFFSET(rSELF)
     move    a0, rSELF
-    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqzc   ra, 1f
     jal     MterpSuspendCheck           # (self)
 1:
diff --git a/runtime/interpreter/mterp/mips64/op_return_void.S b/runtime/interpreter/mterp/mips64/op_return_void.S
index 05253ae..f6eee91 100644
--- a/runtime/interpreter/mterp/mips64/op_return_void.S
+++ b/runtime/interpreter/mterp/mips64/op_return_void.S
@@ -3,7 +3,7 @@
     jal     MterpThreadFenceForConstructor
     lw      ra, THREAD_FLAGS_OFFSET(rSELF)
     move    a0, rSELF
-    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqzc   ra, 1f
     jal     MterpSuspendCheck           # (self)
 1:
diff --git a/runtime/interpreter/mterp/mips64/op_return_void_no_barrier.S b/runtime/interpreter/mterp/mips64/op_return_void_no_barrier.S
index f67e811..4e9b640 100644
--- a/runtime/interpreter/mterp/mips64/op_return_void_no_barrier.S
+++ b/runtime/interpreter/mterp/mips64/op_return_void_no_barrier.S
@@ -1,7 +1,7 @@
     .extern MterpSuspendCheck
     lw      ra, THREAD_FLAGS_OFFSET(rSELF)
     move    a0, rSELF
-    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqzc   ra, 1f
     jal     MterpSuspendCheck           # (self)
 1:
diff --git a/runtime/interpreter/mterp/mips64/op_return_wide.S b/runtime/interpreter/mterp/mips64/op_return_wide.S
index 544e027..91ca1fa 100644
--- a/runtime/interpreter/mterp/mips64/op_return_wide.S
+++ b/runtime/interpreter/mterp/mips64/op_return_wide.S
@@ -8,7 +8,7 @@
     jal     MterpThreadFenceForConstructor
     lw      ra, THREAD_FLAGS_OFFSET(rSELF)
     move    a0, rSELF
-    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqzc   ra, 1f
     jal     MterpSuspendCheck           # (self)
 1:
diff --git a/runtime/interpreter/mterp/mips64/op_unused_fa.S b/runtime/interpreter/mterp/mips64/op_unused_fa.S
deleted file mode 100644
index 29463d7..0000000
--- a/runtime/interpreter/mterp/mips64/op_unused_fa.S
+++ /dev/null
@@ -1 +0,0 @@
-%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mips64/op_unused_fb.S b/runtime/interpreter/mterp/mips64/op_unused_fb.S
deleted file mode 100644
index 29463d7..0000000
--- a/runtime/interpreter/mterp/mips64/op_unused_fb.S
+++ /dev/null
@@ -1 +0,0 @@
-%include "mips64/unused.S"
diff --git a/runtime/interpreter/mterp/mterp.cc b/runtime/interpreter/mterp/mterp.cc
index cf8d4bd..2bd47bb 100644
--- a/runtime/interpreter/mterp/mterp.cc
+++ b/runtime/interpreter/mterp/mterp.cc
@@ -291,11 +291,11 @@
                                    ShadowFrame* shadow_frame,
                                    Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  String* s = ResolveString(self, *shadow_frame,  index);
+  ObjPtr<mirror::String> s = ResolveString(self, *shadow_frame, index);
   if (UNLIKELY(s == nullptr)) {
     return true;
   }
-  shadow_frame->SetVRegReference(tgt_vreg, s);
+  shadow_frame->SetVRegReference(tgt_vreg, s.Ptr());
   return false;
 }
 
@@ -304,7 +304,7 @@
                                   ShadowFrame* shadow_frame,
                                   Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  Class* c = ResolveVerifyAndClinit(index, shadow_frame->GetMethod(), self, false, false);
+  mirror::Class* c = ResolveVerifyAndClinit(index, shadow_frame->GetMethod(), self, false, false);
   if (UNLIKELY(c == nullptr)) {
     return true;
   }
@@ -317,12 +317,12 @@
                                  art::ArtMethod* method,
                                  Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  Class* c = ResolveVerifyAndClinit(index, method, self, false, false);
+  ObjPtr<mirror::Class> c = ResolveVerifyAndClinit(index, method, self, false, false);
   if (UNLIKELY(c == nullptr)) {
     return true;
   }
   // Must load obj from vreg following ResolveVerifyAndClinit due to moving gc.
-  Object* obj = vreg_addr->AsMirrorPtr();
+  mirror::Object* obj = vreg_addr->AsMirrorPtr();
   if (UNLIKELY(obj != nullptr && !obj->InstanceOf(c))) {
     ThrowClassCastException(c, obj->GetClass());
     return true;
@@ -335,16 +335,16 @@
                                   art::ArtMethod* method,
                                   Thread* self)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  Class* c = ResolveVerifyAndClinit(index, method, self, false, false);
+  ObjPtr<mirror::Class> c = ResolveVerifyAndClinit(index, method, self, false, false);
   if (UNLIKELY(c == nullptr)) {
     return false;  // Caller will check for pending exception.  Return value unimportant.
   }
   // Must load obj from vreg following ResolveVerifyAndClinit due to moving gc.
-  Object* obj = vreg_addr->AsMirrorPtr();
+  mirror::Object* obj = vreg_addr->AsMirrorPtr();
   return (obj != nullptr) && obj->InstanceOf(c);
 }
 
-extern "C" size_t MterpFillArrayData(Object* obj, const Instruction::ArrayDataPayload* payload)
+extern "C" size_t MterpFillArrayData(mirror::Object* obj, const Instruction::ArrayDataPayload* payload)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   return FillArrayData(obj, payload);
 }
@@ -352,9 +352,12 @@
 extern "C" size_t MterpNewInstance(ShadowFrame* shadow_frame, Thread* self, uint32_t inst_data)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   const Instruction* inst = Instruction::At(shadow_frame->GetDexPCPtr());
-  Object* obj = nullptr;
-  Class* c = ResolveVerifyAndClinit(inst->VRegB_21c(), shadow_frame->GetMethod(),
-                                    self, false, false);
+  mirror::Object* obj = nullptr;
+  mirror::Class* c = ResolveVerifyAndClinit(inst->VRegB_21c(),
+                                            shadow_frame->GetMethod(),
+                                            self,
+                                            false,
+                                            false);
   if (LIKELY(c != nullptr)) {
     if (UNLIKELY(c->IsStringClass())) {
       gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
@@ -404,13 +407,13 @@
                                   uint32_t inst_data)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   const Instruction* inst = Instruction::At(dex_pc_ptr);
-  Object* a = shadow_frame->GetVRegReference(inst->VRegB_23x());
+  mirror::Object* a = shadow_frame->GetVRegReference(inst->VRegB_23x());
   if (UNLIKELY(a == nullptr)) {
     return false;
   }
   int32_t index = shadow_frame->GetVReg(inst->VRegC_23x());
-  Object* val = shadow_frame->GetVRegReference(inst->VRegA_23x(inst_data));
-  ObjectArray<Object>* array = a->AsObjectArray<Object>();
+  mirror::Object* val = shadow_frame->GetVRegReference(inst->VRegA_23x(inst_data));
+  mirror::ObjectArray<mirror::Object>* array = a->AsObjectArray<mirror::Object>();
   if (array->CheckIsValidIndex(index) && array->CheckAssignable(val)) {
     array->SetWithoutChecks<false>(index, val);
     return true;
@@ -442,7 +445,7 @@
     REQUIRES_SHARED(Locks::mutator_lock_) {
   const Instruction* inst = Instruction::At(dex_pc_ptr);
   int32_t length = shadow_frame->GetVReg(inst->VRegB_22c(inst_data));
-  Object* obj = AllocArrayFromCode<false, true>(
+  mirror::Object* obj = AllocArrayFromCode<false, true>(
       inst->VRegC_22c(), length, shadow_frame->GetMethod(), self,
       Runtime::Current()->GetHeap()->GetCurrentAllocator());
   if (UNLIKELY(obj == nullptr)) {
@@ -561,6 +564,8 @@
     LOG(INFO) << "Checkpoint fallback: " << inst->Opcode(inst_data);
   } else if (flags & kSuspendRequest) {
     LOG(INFO) << "Suspend fallback: " << inst->Opcode(inst_data);
+  } else if (flags & kEmptyCheckpointRequest) {
+    LOG(INFO) << "Empty checkpoint fallback: " << inst->Opcode(inst_data);
   }
 }
 
@@ -678,7 +683,7 @@
     ThrowNullPointerExceptionFromInterpreter();
     return nullptr;
   }
-  ObjectArray<Object>* array = arr->AsObjectArray<Object>();
+  mirror::ObjectArray<mirror::Object>* array = arr->AsObjectArray<mirror::Object>();
   if (LIKELY(array->CheckIsValidIndex(index))) {
     return array->GetWithoutChecks(index);
   } else {
diff --git a/runtime/interpreter/mterp/out/mterp_arm.S b/runtime/interpreter/mterp/out/mterp_arm.S
index c33df6d..4d540d7 100644
--- a/runtime/interpreter/mterp/out/mterp_arm.S
+++ b/runtime/interpreter/mterp/out/mterp_arm.S
@@ -619,7 +619,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     mov     r0, rSELF
-    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     blne    MterpSuspendCheck                       @ (self)
     mov    r0, #0
     mov    r1, #0
@@ -639,7 +639,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     mov     r0, rSELF
-    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     blne    MterpSuspendCheck                       @ (self)
     mov     r2, rINST, lsr #8           @ r2<- AA
     GET_VREG r0, r2                     @ r0<- vAA
@@ -658,7 +658,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     mov     r0, rSELF
-    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     blne    MterpSuspendCheck                       @ (self)
     mov     r2, rINST, lsr #8           @ r2<- AA
     VREG_INDEX_TO_ADDR r2, r2           @ r2<- &fp[AA]
@@ -680,7 +680,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     mov     r0, rSELF
-    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     blne    MterpSuspendCheck                       @ (self)
     mov     r2, rINST, lsr #8           @ r2<- AA
     GET_VREG r0, r2                     @ r0<- vAA
@@ -3149,7 +3149,7 @@
 /* File: arm/op_return_void_no_barrier.S */
     ldr     lr, [rSELF, #THREAD_FLAGS_OFFSET]
     mov     r0, rSELF
-    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     blne    MterpSuspendCheck                       @ (self)
     mov    r0, #0
     mov    r1, #0
@@ -7305,24 +7305,16 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fa: /* 0xfa */
-/* File: arm/op_unused_fa.S */
-/* File: arm/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
-  b MterpFallback
+.L_op_invoke_polymorphic: /* 0xfa */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fb: /* 0xfb */
-/* File: arm/op_unused_fb.S */
-/* File: arm/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
-  b MterpFallback
+.L_op_invoke_polymorphic_range: /* 0xfb */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
 
 
 /* ------------------------------ */
@@ -11734,7 +11726,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fa: /* 0xfa */
+.L_ALT_op_invoke_polymorphic: /* 0xfa */
 /* File: arm/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11751,7 +11743,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fb: /* 0xfb */
+.L_ALT_op_invoke_polymorphic_range: /* 0xfb */
 /* File: arm/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11997,7 +11989,7 @@
     REFRESH_IBASE
     add     r2, rINST, rINST            @ r2<- byte offset
     FETCH_ADVANCE_INST_RB r2            @ update rPC, load rINST
-    ands    lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     bne     .L_suspend_request_pending
     GET_INST_OPCODE ip                  @ extract opcode from rINST
     GOTO_OPCODE ip                      @ jump to next instruction
diff --git a/runtime/interpreter/mterp/out/mterp_arm64.S b/runtime/interpreter/mterp/out/mterp_arm64.S
index c7303b9..42f8c1b 100644
--- a/runtime/interpreter/mterp/out/mterp_arm64.S
+++ b/runtime/interpreter/mterp/out/mterp_arm64.S
@@ -616,7 +616,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     x0, xSELF
-    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    w7, w7, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.ne    .Lop_return_void_check
 .Lop_return_void_return:
     mov     x0, #0
@@ -639,7 +639,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     x0, xSELF
-    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    w7, w7, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.ne    .Lop_return_check
 .Lop_return_return:
     lsr     w2, wINST, #8               // r2<- AA
@@ -662,7 +662,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     x0, xSELF
-    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    w7, w7, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.ne    .Lop_return_wide_check
 .Lop_return_wide_return:
     lsr     w2, wINST, #8               // w2<- AA
@@ -687,7 +687,7 @@
     bl      MterpThreadFenceForConstructor
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     x0, xSELF
-    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    w7, w7, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.ne    .Lop_return_object_check
 .Lop_return_object_return:
     lsr     w2, wINST, #8               // r2<- AA
@@ -3033,7 +3033,7 @@
 /* File: arm64/op_return_void_no_barrier.S */
     ldr     w7, [xSELF, #THREAD_FLAGS_OFFSET]
     mov     x0, xSELF
-    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    w7, w7, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.ne    .Lop_return_void_no_barrier_check
 .Lop_return_void_no_barrier_return:
     mov     x0, #0
@@ -6862,24 +6862,16 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fa: /* 0xfa */
-/* File: arm64/op_unused_fa.S */
-/* File: arm64/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
-  b MterpFallback
+.L_op_invoke_polymorphic: /* 0xfa */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fb: /* 0xfb */
-/* File: arm64/op_unused_fb.S */
-/* File: arm64/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
-  b MterpFallback
+.L_op_invoke_polymorphic_range: /* 0xfb */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
 
 
 /* ------------------------------ */
@@ -7090,7 +7082,7 @@
     add     w2, wINST, wINST            // w2<- byte offset
     FETCH_ADVANCE_INST_RB w2            // update rPC, load wINST
     REFRESH_IBASE
-    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.ne    .L_suspend_request_pending
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
@@ -7164,7 +7156,7 @@
  */
 MterpCheckSuspendAndContinue:
     ldr     xIBASE, [xSELF, #THREAD_CURRENT_IBASE_OFFSET]  // refresh xIBASE
-    ands    w7, w7, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    w7, w7, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.ne    check1
     GET_INST_OPCODE ip                  // extract opcode from wINST
     GOTO_OPCODE ip                      // jump to next instruction
@@ -7219,7 +7211,7 @@
     ldr     lr, [xSELF, #THREAD_FLAGS_OFFSET]
     str     x0, [x2]
     mov     x0, xSELF
-    ands    lr, lr, #(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    ands    lr, lr, #THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     b.eq    check2
     bl      MterpSuspendCheck                       // (self)
 check2:
@@ -11519,7 +11511,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fa: /* 0xfa */
+.L_ALT_op_invoke_polymorphic: /* 0xfa */
 /* File: arm64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11536,7 +11528,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fb: /* 0xfb */
+.L_ALT_op_invoke_polymorphic_range: /* 0xfb */
 /* File: arm64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
diff --git a/runtime/interpreter/mterp/out/mterp_mips.S b/runtime/interpreter/mterp/out/mterp_mips.S
index fef7dc6..e154e6c 100644
--- a/runtime/interpreter/mterp/out/mterp_mips.S
+++ b/runtime/interpreter/mterp/out/mterp_mips.S
@@ -160,6 +160,58 @@
 #define fcc1   $fcc1
 #endif
 
+#ifdef MIPS32REVGE2
+#define SEB(rd, rt) \
+    seb       rd, rt
+#define SEH(rd, rt) \
+    seh       rd, rt
+#define INSERT_HIGH_HALF(rd_lo, rt_hi) \
+    ins       rd_lo, rt_hi, 16, 16
+#else
+#define SEB(rd, rt) \
+    sll       rd, rt, 24; \
+    sra       rd, rd, 24
+#define SEH(rd, rt) \
+    sll       rd, rt, 16; \
+    sra       rd, rd, 16
+/* Clobbers rt_hi on pre-R2. */
+#define INSERT_HIGH_HALF(rd_lo, rt_hi) \
+    sll       rt_hi, rt_hi, 16; \
+    or        rd_lo, rt_hi
+#endif
+
+#ifdef FPU64
+#define MOVE_TO_FPU_HIGH(r, flo, fhi) \
+    mthc1     r, flo
+#else
+#define MOVE_TO_FPU_HIGH(r, flo, fhi) \
+    mtc1      r, fhi
+#endif
+
+#ifdef MIPS32REVGE6
+#define JR(rt) \
+    jic       rt, 0
+#define LSA(rd, rs, rt, sa) \
+    .if sa; \
+    lsa       rd, rs, rt, sa; \
+    .else; \
+    addu      rd, rs, rt; \
+    .endif
+#else
+#define JR(rt) \
+    jalr      zero, rt
+#define LSA(rd, rs, rt, sa) \
+    .if sa; \
+    .set      push; \
+    .set      noat; \
+    sll       AT, rs, sa; \
+    addu      rd, AT, rt; \
+    .set      pop; \
+    .else; \
+    addu      rd, rs, rt; \
+    .endif
+#endif
+
 /*
  * Instead of holding a pointer to the shadow frame, we keep rFP at the base of the vregs.  So,
  * to access other shadow frame fields, we need to use a backwards offset.  Define those here.
@@ -193,12 +245,12 @@
     sw        rPC, OFF_FP_DEX_PC_PTR(rFP)
 
 #define EXPORT_DEX_PC(tmp) \
-    lw   tmp, OFF_FP_CODE_ITEM(rFP) \
-    sw   rPC, OFF_FP_DEX_PC_PTR(rFP) \
-    addu tmp, CODEITEM_INSNS_OFFSET \
-    subu tmp, rPC, tmp \
-    sra  tmp, tmp, 1 \
-    sw   tmp, OFF_FP_DEX_PC(rFP)
+    lw        tmp, OFF_FP_CODE_ITEM(rFP); \
+    sw        rPC, OFF_FP_DEX_PC_PTR(rFP); \
+    addu      tmp, CODEITEM_INSNS_OFFSET; \
+    subu      tmp, rPC, tmp; \
+    sra       tmp, tmp, 1; \
+    sw        tmp, OFF_FP_DEX_PC(rFP)
 
 /*
  * Fetch the next instruction from rPC into rINST.  Does not advance rPC.
@@ -213,18 +265,11 @@
  * exception catch may miss.  (This also implies that it must come after
  * EXPORT_PC().)
  */
-#define FETCH_ADVANCE_INST(_count) lhu rINST, ((_count)*2)(rPC); \
+#define FETCH_ADVANCE_INST(_count) \
+    lhu       rINST, ((_count)*2)(rPC); \
     addu      rPC, rPC, ((_count) * 2)
 
 /*
- * The operation performed here is similar to FETCH_ADVANCE_INST, except the
- * src and dest registers are parameterized (not hard-wired to rPC and rINST).
- */
-#define PREFETCH_ADVANCE_INST(_dreg, _sreg, _count) \
-    lhu       _dreg, ((_count)*2)(_sreg) ;            \
-    addu      _sreg, _sreg, (_count)*2
-
-/*
  * Similar to FETCH_ADVANCE_INST, but does not update rPC.  Used to load
  * rINST ahead of possible exception point.  Be sure to manually advance rPC
  * later.
@@ -239,7 +284,8 @@
  * rPC to point to the next instruction.  "rd" must specify the distance
  * in bytes, *not* 16-bit code units, and may be a signed value.
  */
-#define FETCH_ADVANCE_INST_RB(rd) addu rPC, rPC, rd; \
+#define FETCH_ADVANCE_INST_RB(rd) \
+    addu      rPC, rPC, rd; \
     lhu       rINST, (rPC)
 
 /*
@@ -264,38 +310,75 @@
 #define GET_INST_OPCODE(rd) and rd, rINST, 0xFF
 
 /*
- * Put the prefetched instruction's opcode field into the specified register.
+ * Transform opcode into branch target address.
  */
-#define GET_PREFETCHED_OPCODE(dreg, sreg)   andi     dreg, sreg, 255
+#define GET_OPCODE_TARGET(rd) \
+    sll       rd, rd, 7; \
+    addu      rd, rIBASE, rd
 
 /*
  * Begin executing the opcode in rd.
  */
-#define GOTO_OPCODE(rd) sll rd, rd, 7; \
-    addu      rd, rIBASE, rd; \
-    jalr      zero, rd
-
-#define GOTO_OPCODE_BASE(_base, rd)  sll rd, rd, 7; \
-    addu      rd, _base, rd; \
-    jalr      zero, rd
+#define GOTO_OPCODE(rd) \
+    GET_OPCODE_TARGET(rd); \
+    JR(rd)
 
 /*
  * Get/set the 32-bit value from a Dalvik register.
  */
 #define GET_VREG(rd, rix) LOAD_eas2(rd, rFP, rix)
 
-#define GET_VREG_F(rd, rix) EAS2(AT, rFP, rix); \
-    .set noat; l.s rd, (AT); .set at
+#define GET_VREG_F(rd, rix) \
+    .set noat; \
+    EAS2(AT, rFP, rix); \
+    l.s       rd, (AT); \
+    .set at
 
-#define SET_VREG(rd, rix) .set noat; \
+#ifdef MIPS32REVGE6
+#define SET_VREG(rd, rix) \
+    lsa       t8, rix, rFP, 2; \
+    sw        rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8)
+#else
+#define SET_VREG(rd, rix) \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
     sw        rd, 0(t8); \
     addu      t8, rREFS, AT; \
     .set at; \
     sw        zero, 0(t8)
+#endif
 
-#define SET_VREG64(rlo, rhi, rix) .set noat; \
+#ifdef MIPS32REVGE6
+#define SET_VREG_OBJECT(rd, rix) \
+    lsa       t8, rix, rFP, 2; \
+    sw        rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        rd, 0(t8)
+#else
+#define SET_VREG_OBJECT(rd, rix) \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    sw        rd, 0(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        rd, 0(t8)
+#endif
+
+#ifdef MIPS32REVGE6
+#define SET_VREG64(rlo, rhi, rix) \
+    lsa       t8, rix, rFP, 2; \
+    sw        rlo, 0(t8); \
+    sw        rhi, 4(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8); \
+    sw        zero, 4(t8)
+#else
+#define SET_VREG64(rlo, rhi, rix) \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
     sw        rlo, 0(t8); \
@@ -304,9 +387,39 @@
     .set at; \
     sw        zero, 0(t8); \
     sw        zero, 4(t8)
+#endif
 
-#ifdef FPU64
-#define SET_VREG64_F(rlo, rhi, rix) .set noat; \
+#ifdef MIPS32REVGE6
+#define SET_VREG_F(rd, rix) \
+    lsa       t8, rix, rFP, 2; \
+    s.s       rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8)
+#else
+#define SET_VREG_F(rd, rix) \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    s.s       rd, 0(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        zero, 0(t8)
+#endif
+
+#ifdef MIPS32REVGE6
+#define SET_VREG64_F(rlo, rhi, rix) \
+    lsa       t8, rix, rFP, 2; \
+    .set noat; \
+    mfhc1     AT, rlo; \
+    s.s       rlo, 0(t8); \
+    sw        AT, 4(t8); \
+    .set at; \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8); \
+    sw        zero, 4(t8)
+#elif defined(FPU64)
+#define SET_VREG64_F(rlo, rhi, rix) \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rREFS, AT; \
     sw        zero, 0(t8); \
@@ -317,7 +430,8 @@
     .set at; \
     s.s       rlo, 0(t8)
 #else
-#define SET_VREG64_F(rlo, rhi, rix) .set noat; \
+#define SET_VREG64_F(rlo, rhi, rix) \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
     s.s       rlo, 0(t8); \
@@ -328,18 +442,21 @@
     sw        zero, 4(t8)
 #endif
 
-#define SET_VREG_OBJECT(rd, rix) .set noat; \
-    sll       AT, rix, 2; \
-    addu      t8, rFP, AT; \
-    sw        rd, 0(t8); \
-    addu      t8, rREFS, AT; \
-    .set at; \
-    sw        rd, 0(t8)
-
 /* Combination of the SET_VREG and GOTO_OPCODE functions to save 1 instruction */
-#define SET_VREG_GOTO(rd, rix, dst) .set noreorder; \
-    sll       dst, dst, 7; \
-    addu      dst, rIBASE, dst; \
+#ifdef MIPS32REVGE6
+#define SET_VREG_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    sw        rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    jalr      zero, dst; \
+    sw        zero, 0(t8); \
+    .set reorder
+#else
+#define SET_VREG_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
     .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
@@ -349,11 +466,51 @@
     jalr      zero, dst; \
     sw        zero, 0(t8); \
     .set reorder
+#endif
+
+/* Combination of the SET_VREG_OBJECT and GOTO_OPCODE functions to save 1 instruction */
+#ifdef MIPS32REVGE6
+#define SET_VREG_OBJECT_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    sw        rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    jalr      zero, dst; \
+    sw        rd, 0(t8); \
+    .set reorder
+#else
+#define SET_VREG_OBJECT_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    sw        rd, 0(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    jalr      zero, dst; \
+    sw        rd, 0(t8); \
+    .set reorder
+#endif
 
 /* Combination of the SET_VREG64 and GOTO_OPCODE functions to save 1 instruction */
-#define SET_VREG64_GOTO(rlo, rhi, rix, dst) .set noreorder; \
-    sll       dst, dst, 7; \
-    addu      dst, rIBASE, dst; \
+#ifdef MIPS32REVGE6
+#define SET_VREG64_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    sw        rlo, 0(t8); \
+    sw        rhi, 4(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8); \
+    jalr      zero, dst; \
+    sw        zero, 4(t8); \
+    .set reorder
+#else
+#define SET_VREG64_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
     .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
@@ -365,14 +522,82 @@
     jalr      zero, dst; \
     sw        zero, 4(t8); \
     .set reorder
+#endif
 
-#define SET_VREG_F(rd, rix) .set noat; \
+/* Combination of the SET_VREG_F and GOTO_OPCODE functions to save 1 instruction */
+#ifdef MIPS32REVGE6
+#define SET_VREG_F_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    s.s       rd, 0(t8); \
+    lsa       t8, rix, rREFS, 2; \
+    jalr      zero, dst; \
+    sw        zero, 0(t8); \
+    .set reorder
+#else
+#define SET_VREG_F_GOTO(rd, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    .set noat; \
     sll       AT, rix, 2; \
     addu      t8, rFP, AT; \
     s.s       rd, 0(t8); \
     addu      t8, rREFS, AT; \
     .set at; \
-    sw        zero, 0(t8)
+    jalr      zero, dst; \
+    sw        zero, 0(t8); \
+    .set reorder
+#endif
+
+/* Combination of the SET_VREG64_F and GOTO_OPCODE functions to save 1 instruction */
+#ifdef MIPS32REVGE6
+#define SET_VREG64_F_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    lsa       t8, rix, rFP, 2; \
+    .set noat; \
+    mfhc1     AT, rlo; \
+    s.s       rlo, 0(t8); \
+    sw        AT, 4(t8); \
+    .set at; \
+    lsa       t8, rix, rREFS, 2; \
+    sw        zero, 0(t8); \
+    jalr      zero, dst; \
+    sw        zero, 4(t8); \
+    .set reorder
+#elif defined(FPU64)
+#define SET_VREG64_F_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rREFS, AT; \
+    sw        zero, 0(t8); \
+    sw        zero, 4(t8); \
+    addu      t8, rFP, AT; \
+    mfhc1     AT, rlo; \
+    sw        AT, 4(t8); \
+    .set at; \
+    jalr      zero, dst; \
+    s.s       rlo, 0(t8); \
+    .set reorder
+#else
+#define SET_VREG64_F_GOTO(rlo, rhi, rix, dst) \
+    .set noreorder; \
+    GET_OPCODE_TARGET(dst); \
+    .set noat; \
+    sll       AT, rix, 2; \
+    addu      t8, rFP, AT; \
+    s.s       rlo, 0(t8); \
+    s.s       rhi, 4(t8); \
+    addu      t8, rREFS, AT; \
+    .set at; \
+    sw        zero, 0(t8); \
+    jalr      zero, dst; \
+    sw        zero, 4(t8); \
+    .set reorder
+#endif
 
 #define GET_OPA(rd) srl rd, rINST, 8
 #ifdef MIPS32REVGE2
@@ -383,60 +608,60 @@
 #define GET_OPB(rd) srl rd, rINST, 12
 
 /*
- * Form an Effective Address rd = rbase + roff<<n;
- * Uses reg AT
+ * Form an Effective Address rd = rbase + roff<<shift;
+ * Uses reg AT on pre-R6.
  */
-#define EASN(rd, rbase, roff, rshift) .set noat; \
-    sll       AT, roff, rshift; \
-    addu      rd, rbase, AT; \
-    .set at
+#define EASN(rd, rbase, roff, shift) LSA(rd, roff, rbase, shift)
 
 #define EAS1(rd, rbase, roff) EASN(rd, rbase, roff, 1)
 #define EAS2(rd, rbase, roff) EASN(rd, rbase, roff, 2)
 #define EAS3(rd, rbase, roff) EASN(rd, rbase, roff, 3)
 #define EAS4(rd, rbase, roff) EASN(rd, rbase, roff, 4)
 
-/*
- * Form an Effective Shift Right rd = rbase + roff>>n;
- * Uses reg AT
- */
-#define ESRN(rd, rbase, roff, rshift) .set noat; \
-    srl       AT, roff, rshift; \
-    addu      rd, rbase, AT; \
+#define LOAD_eas2(rd, rbase, roff) \
+    .set noat; \
+    EAS2(AT, rbase, roff); \
+    lw        rd, 0(AT); \
     .set at
 
-#define LOAD_eas2(rd, rbase, roff) EAS2(AT, rbase, roff); \
-    .set noat; lw rd, 0(AT); .set at
-
-#define STORE_eas2(rd, rbase, roff) EAS2(AT, rbase, roff); \
-    .set noat; sw rd, 0(AT); .set at
+#define STORE_eas2(rd, rbase, roff) \
+    .set noat; \
+    EAS2(AT, rbase, roff); \
+    sw        rd, 0(AT); \
+    .set at
 
 #define LOAD_RB_OFF(rd, rbase, off) lw rd, off(rbase)
 #define STORE_RB_OFF(rd, rbase, off) sw rd, off(rbase)
 
-#define STORE64_off(rlo, rhi, rbase, off) sw rlo, off(rbase); \
+#define STORE64_off(rlo, rhi, rbase, off) \
+    sw        rlo, off(rbase); \
     sw        rhi, (off+4)(rbase)
-#define LOAD64_off(rlo, rhi, rbase, off) lw rlo, off(rbase); \
+#define LOAD64_off(rlo, rhi, rbase, off) \
+    lw        rlo, off(rbase); \
     lw        rhi, (off+4)(rbase)
 
 #define STORE64(rlo, rhi, rbase) STORE64_off(rlo, rhi, rbase, 0)
 #define LOAD64(rlo, rhi, rbase) LOAD64_off(rlo, rhi, rbase, 0)
 
 #ifdef FPU64
-#define STORE64_off_F(rlo, rhi, rbase, off) s.s rlo, off(rbase); \
+#define STORE64_off_F(rlo, rhi, rbase, off) \
+    s.s       rlo, off(rbase); \
     .set noat; \
     mfhc1     AT, rlo; \
     sw        AT, (off+4)(rbase); \
     .set at
-#define LOAD64_off_F(rlo, rhi, rbase, off) l.s rlo, off(rbase); \
+#define LOAD64_off_F(rlo, rhi, rbase, off) \
+    l.s       rlo, off(rbase); \
     .set noat; \
     lw        AT, (off+4)(rbase); \
     mthc1     AT, rlo; \
     .set at
 #else
-#define STORE64_off_F(rlo, rhi, rbase, off) s.s rlo, off(rbase); \
+#define STORE64_off_F(rlo, rhi, rbase, off) \
+    s.s       rlo, off(rbase); \
     s.s       rhi, (off+4)(rbase)
-#define LOAD64_off_F(rlo, rhi, rbase, off) l.s rlo, off(rbase); \
+#define LOAD64_off_F(rlo, rhi, rbase, off) \
+    l.s       rlo, off(rbase); \
     l.s       rhi, (off+4)(rbase)
 #endif
 
@@ -498,6 +723,14 @@
 #define REFRESH_IBASE() \
     lw        rIBASE, THREAD_CURRENT_IBASE_OFFSET(rSELF)
 
+/* Constants for float/double_to_int/long conversions */
+#define INT_MIN                 0x80000000
+#define INT_MIN_AS_FLOAT        0xCF000000
+#define INT_MIN_AS_DOUBLE_HIGH  0xC1E00000
+#define LONG_MIN_HIGH           0x80000000
+#define LONG_MIN_AS_FLOAT       0xDF000000
+#define LONG_MIN_AS_DOUBLE_HIGH 0xC3E00000
+
 /* File: mips/entry.S */
 /*
  * Copyright (C) 2016 The Android Open Source Project
@@ -599,11 +832,10 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[B]
     GET_INST_OPCODE(t0)                    #  t0 <- opcode from rINST
     .if 0
-    SET_VREG_OBJECT(a2, a0)                #  fp[A] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[A] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[A] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[A] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 /* ------------------------------ */
     .balign 128
@@ -617,11 +849,10 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[BBBB]
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     .if 0
-    SET_VREG_OBJECT(a2, a0)                #  fp[AA] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[AA] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[AA] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[AA] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 /* ------------------------------ */
     .balign 128
@@ -635,11 +866,10 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[BBBB]
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     .if 0
-    SET_VREG_OBJECT(a2, a0)                #  fp[AAAA] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[AAAA] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[AAAA] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[AAAA] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 /* ------------------------------ */
     .balign 128
@@ -652,9 +882,8 @@
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
     LOAD64(a0, a1, a3)                     #  a0/a1 <- fp[B]
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[A] <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[A] <- a0/a1
 
 /* ------------------------------ */
     .balign 128
@@ -667,9 +896,8 @@
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[BBBB]
     LOAD64(a0, a1, a3)                     #  a0/a1 <- fp[BBBB]
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[AA] <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[AA] <- a0/a1
 
 /* ------------------------------ */
     .balign 128
@@ -682,9 +910,8 @@
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[BBBB]
     LOAD64(a0, a1, a3)                     #  a0/a1 <- fp[BBBB]
     FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[AAAA] <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[AAAA] <- a0/a1
 
 /* ------------------------------ */
     .balign 128
@@ -699,11 +926,10 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[B]
     GET_INST_OPCODE(t0)                    #  t0 <- opcode from rINST
     .if 1
-    SET_VREG_OBJECT(a2, a0)                #  fp[A] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[A] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[A] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[A] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -719,11 +945,10 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[BBBB]
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     .if 1
-    SET_VREG_OBJECT(a2, a0)                #  fp[AA] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[AA] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[AA] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[AA] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -739,11 +964,10 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[BBBB]
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     .if 1
-    SET_VREG_OBJECT(a2, a0)                #  fp[AAAA] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[AAAA] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[AAAA] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[AAAA] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -758,11 +982,10 @@
     lw    a0, 0(a0)                        #  a0 <- result.i
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     .if 0
-    SET_VREG_OBJECT(a0, a2)                #  fp[AA] <- a0
+    SET_VREG_OBJECT_GOTO(a0, a2, t0)       #  fp[AA] <- a0
     .else
-    SET_VREG(a0, a2)                       #  fp[AA] <- a0
+    SET_VREG_GOTO(a0, a2, t0)              #  fp[AA] <- a0
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 /* ------------------------------ */
     .balign 128
@@ -773,9 +996,8 @@
     lw    a3, OFF_FP_RESULT_REGISTER(rFP)  #  get pointer to result JType
     LOAD64(a0, a1, a3)                     #  a0/a1 <- retval.j
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[AA] <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[AA] <- a0/a1
 
 /* ------------------------------ */
     .balign 128
@@ -790,11 +1012,10 @@
     lw    a0, 0(a0)                        #  a0 <- result.i
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     .if 1
-    SET_VREG_OBJECT(a0, a2)                #  fp[AA] <- a0
+    SET_VREG_OBJECT_GOTO(a0, a2, t0)       #  fp[AA] <- a0
     .else
-    SET_VREG(a0, a2)                       #  fp[AA] <- a0
+    SET_VREG_GOTO(a0, a2, t0)              #  fp[AA] <- a0
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -805,10 +1026,11 @@
     GET_OPA(a2)                                 #  a2 <- AA
     lw    a3, THREAD_EXCEPTION_OFFSET(rSELF)    #  get exception obj
     FETCH_ADVANCE_INST(1)                       #  advance rPC, load rINST
-    SET_VREG_OBJECT(a3, a2)                     #  fp[AA] <- exception obj
     GET_INST_OPCODE(t0)                         #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
+    SET_VREG_OBJECT(a3, a2)                     #  fp[AA] <- exception obj
     sw    zero, THREAD_EXCEPTION_OFFSET(rSELF)  #  clear exception
-    GOTO_OPCODE(t0)                             #  jump to next instruction
+    JR(t0)                                      #  jump to next instruction
 
 /* ------------------------------ */
     .balign 128
@@ -818,7 +1040,7 @@
     JAL(MterpThreadFenceForConstructor)
     lw        ra, THREAD_FLAGS_OFFSET(rSELF)
     move      a0, rSELF
-    and       ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and       ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqz      ra, 1f
     JAL(MterpSuspendCheck)                 # (self)
 1:
@@ -840,7 +1062,7 @@
     JAL(MterpThreadFenceForConstructor)
     lw        ra, THREAD_FLAGS_OFFSET(rSELF)
     move      a0, rSELF
-    and       ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and       ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqz      ra, 1f
     JAL(MterpSuspendCheck)                 # (self)
 1:
@@ -861,7 +1083,7 @@
     JAL(MterpThreadFenceForConstructor)
     lw        ra, THREAD_FLAGS_OFFSET(rSELF)
     move      a0, rSELF
-    and       ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and       ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqz      ra, 1f
     JAL(MterpSuspendCheck)                 # (self)
 1:
@@ -885,7 +1107,7 @@
     JAL(MterpThreadFenceForConstructor)
     lw        ra, THREAD_FLAGS_OFFSET(rSELF)
     move      a0, rSELF
-    and       ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and       ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqz      ra, 1f
     JAL(MterpSuspendCheck)                 # (self)
 1:
@@ -899,7 +1121,7 @@
     .balign 128
 .L_op_const_4: /* 0x12 */
 /* File: mips/op_const_4.S */
-    # const/4 vA,                          /* +B */
+    /* const/4 vA, +B */
     sll       a1, rINST, 16                #  a1 <- Bxxx0000
     GET_OPA(a0)                            #  a0 <- A+
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
@@ -912,7 +1134,7 @@
     .balign 128
 .L_op_const_16: /* 0x13 */
 /* File: mips/op_const_16.S */
-    # const/16 vAA,                        /* +BBBB */
+    /* const/16 vAA, +BBBB */
     FETCH_S(a0, 1)                         #  a0 <- ssssBBBB (sign-extended)
     GET_OPA(a3)                            #  a3 <- AA
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
@@ -923,13 +1145,12 @@
     .balign 128
 .L_op_const: /* 0x14 */
 /* File: mips/op_const.S */
-    # const vAA,                           /* +BBBBbbbb */
+    /* const vAA, +BBBBbbbb */
     GET_OPA(a3)                            #  a3 <- AA
     FETCH(a0, 1)                           #  a0 <- bbbb (low)
     FETCH(a1, 2)                           #  a1 <- BBBB (high)
     FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
-    sll       a1, a1, 16
-    or        a0, a1, a0                   #  a0 <- BBBBbbbb
+    INSERT_HIGH_HALF(a0, a1)               #  a0 <- BBBBbbbb
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, a3, t0)              #  vAA <- a0
 
@@ -937,7 +1158,7 @@
     .balign 128
 .L_op_const_high16: /* 0x15 */
 /* File: mips/op_const_high16.S */
-    # const/high16 vAA,                    /* +BBBB0000 */
+    /* const/high16 vAA, +BBBB0000 */
     FETCH(a0, 1)                           #  a0 <- 0000BBBB (zero-extended)
     GET_OPA(a3)                            #  a3 <- AA
     sll       a0, a0, 16                   #  a0 <- BBBB0000
@@ -949,69 +1170,62 @@
     .balign 128
 .L_op_const_wide_16: /* 0x16 */
 /* File: mips/op_const_wide_16.S */
-    # const-wide/16 vAA,                   /* +BBBB */
+    /* const-wide/16 vAA, +BBBB */
     FETCH_S(a0, 1)                         #  a0 <- ssssBBBB (sign-extended)
     GET_OPA(a3)                            #  a3 <- AA
     sra       a1, a0, 31                   #  a1 <- ssssssss
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, a3)                 #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a3, t0)        #  vAA/vAA+1 <- a0/a1
 
 /* ------------------------------ */
     .balign 128
 .L_op_const_wide_32: /* 0x17 */
 /* File: mips/op_const_wide_32.S */
-    # const-wide/32 vAA,                   /* +BBBBbbbb */
+    /* const-wide/32 vAA, +BBBBbbbb */
     FETCH(a0, 1)                           #  a0 <- 0000bbbb (low)
     GET_OPA(a3)                            #  a3 <- AA
     FETCH_S(a2, 2)                         #  a2 <- ssssBBBB (high)
     FETCH_ADVANCE_INST(3)                  #  advance rPC, load rINST
-    sll       a2, a2, 16
-    or        a0, a0, a2                   #  a0 <- BBBBbbbb
+    INSERT_HIGH_HALF(a0, a2)               #  a0 <- BBBBbbbb
     sra       a1, a0, 31                   #  a1 <- ssssssss
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, a3)                 #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a3, t0)        #  vAA/vAA+1 <- a0/a1
 
 /* ------------------------------ */
     .balign 128
 .L_op_const_wide: /* 0x18 */
 /* File: mips/op_const_wide.S */
-    # const-wide vAA,                      /* +HHHHhhhhBBBBbbbb */
+    /* const-wide vAA, +HHHHhhhhBBBBbbbb */
     FETCH(a0, 1)                           #  a0 <- bbbb (low)
     FETCH(a1, 2)                           #  a1 <- BBBB (low middle)
     FETCH(a2, 3)                           #  a2 <- hhhh (high middle)
-    sll       a1, 16 #
-    or        a0, a1                       #  a0 <- BBBBbbbb (low word)
+    INSERT_HIGH_HALF(a0, a1)               #  a0 <- BBBBbbbb (low word)
     FETCH(a3, 4)                           #  a3 <- HHHH (high)
     GET_OPA(t1)                            #  t1 <- AA
-    sll       a3, 16
-    or        a1, a3, a2                   #  a1 <- HHHHhhhh (high word)
+    INSERT_HIGH_HALF(a2, a3)               #  a2 <- HHHHhhhh (high word)
     FETCH_ADVANCE_INST(5)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, t1)                 #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a2, t1, t0)        #  vAA/vAA+1 <- a0/a2
 
 /* ------------------------------ */
     .balign 128
 .L_op_const_wide_high16: /* 0x19 */
 /* File: mips/op_const_wide_high16.S */
-    # const-wide/high16 vAA,               /* +BBBB000000000000 */
+    /* const-wide/high16 vAA, +BBBB000000000000 */
     FETCH(a1, 1)                           #  a1 <- 0000BBBB (zero-extended)
     GET_OPA(a3)                            #  a3 <- AA
     li        a0, 0                        #  a0 <- 00000000
     sll       a1, 16                       #  a1 <- BBBB0000
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, a3)                 #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a3, t0)        #  vAA/vAA+1 <- a0/a1
 
 /* ------------------------------ */
     .balign 128
 .L_op_const_string: /* 0x1a */
 /* File: mips/op_const_string.S */
-    # const/string vAA, String             /* BBBB */
+    /* const/string vAA, string@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                        # a0 <- BBBB
     GET_OPA(a1)                         # a1 <- AA
@@ -1028,13 +1242,12 @@
     .balign 128
 .L_op_const_string_jumbo: /* 0x1b */
 /* File: mips/op_const_string_jumbo.S */
-    # const/string vAA, String          /* BBBBBBBB */
+    /* const/string vAA, string@BBBBBBBB */
     EXPORT_PC()
     FETCH(a0, 1)                        # a0 <- bbbb (low)
     FETCH(a2, 2)                        # a2 <- BBBB (high)
     GET_OPA(a1)                         # a1 <- AA
-    sll    a2, a2, 16
-    or     a0, a0, a2                   # a0 <- BBBBbbbb
+    INSERT_HIGH_HALF(a0, a2)            # a0 <- BBBBbbbb
     addu   a2, rFP, OFF_FP_SHADOWFRAME  # a2 <- shadow frame
     move   a3, rSELF
     JAL(MterpConstString)               # v0 <- Mterp(index, tgt_reg, shadow_frame, self)
@@ -1048,7 +1261,7 @@
     .balign 128
 .L_op_const_class: /* 0x1c */
 /* File: mips/op_const_class.S */
-    # const/class vAA, Class               /* BBBB */
+    /* const/class vAA, class@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                        # a0 <- BBBB
     GET_OPA(a1)                         # a1 <- AA
@@ -1108,7 +1321,7 @@
     /*
      * Check to see if a cast from one class to another is allowed.
      */
-    # check-cast vAA, class                /* BBBB */
+    /* check-cast vAA, class@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                           #  a0 <- BBBB
     GET_OPA(a1)                            #  a1 <- AA
@@ -1132,7 +1345,7 @@
      * Most common situation is a non-null object, being compared against
      * an already-resolved class.
      */
-    # instance-of vA, vB, class            /* CCCC */
+    /* instance-of vA, vB, class@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- CCCC
     GET_OPB(a1)                            # a1 <- B
@@ -1155,6 +1368,7 @@
     /*
      * Return the length of an array.
      */
+    /* array-length vA, vB */
     GET_OPB(a1)                            #  a1 <- B
     GET_OPA4(a2)                           #  a2 <- A+
     GET_VREG(a0, a1)                       #  a0 <- vB (object ref)
@@ -1172,7 +1386,7 @@
     /*
      * Create a new instance of a class.
      */
-    # new-instance vAA, class              /* BBBB */
+    /* new-instance vAA, class@BBBB */
     EXPORT_PC()
     addu   a0, rFP, OFF_FP_SHADOWFRAME
     move   a1, rSELF
@@ -1215,8 +1429,8 @@
      *
      * for: filled-new-array, filled-new-array/range
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, type       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
     .extern MterpFilledNewArray
     EXPORT_PC()
     addu   a0, rFP, OFF_FP_SHADOWFRAME     # a0 <- shadow frame
@@ -1238,8 +1452,8 @@
      *
      * for: filled-new-array, filled-new-array/range
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, type       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, type@BBBB */
     .extern MterpFilledNewArrayRange
     EXPORT_PC()
     addu   a0, rFP, OFF_FP_SHADOWFRAME     # a0 <- shadow frame
@@ -1258,11 +1472,10 @@
 /* File: mips/op_fill_array_data.S */
     /* fill-array-data vAA, +BBBBBBBB */
     EXPORT_PC()
-    FETCH(a0, 1)                           #  a0 <- bbbb (lo)
-    FETCH(a1, 2)                           #  a1 <- BBBB (hi)
+    FETCH(a1, 1)                           #  a1 <- bbbb (lo)
+    FETCH(a0, 2)                           #  a0 <- BBBB (hi)
     GET_OPA(a3)                            #  a3 <- AA
-    sll       a1, a1, 16                   #  a1 <- BBBBbbbb
-    or        a1, a0, a1                   #  a1 <- BBBBbbbb
+    INSERT_HIGH_HALF(a1, a0)               #  a1 <- BBBBbbbb
     GET_VREG(a0, a3)                       #  a0 <- vAA (array object)
     EAS1(a1, rPC, a1)                      #  a1 <- PC + BBBBbbbb*2 (array data off.)
     JAL(MterpFillArrayData)                #  v0 <- Mterp(obj, payload)
@@ -1330,10 +1543,9 @@
      * our "backward branch" test must be "<=0" instead of "<0".
      */
     /* goto/32 +AAAAAAAA */
-    FETCH(a0, 1)                           #  a0 <- aaaa (lo)
+    FETCH(rINST, 1)                        #  rINST <- aaaa (lo)
     FETCH(a1, 2)                           #  a1 <- AAAA (hi)
-    sll       a1, a1, 16
-    or        rINST, a0, a1                #  rINST <- AAAAaaaa
+    INSERT_HIGH_HALF(rINST, a1)            #  rINST <- AAAAaaaa
     b         MterpCommonTakenBranchNoFlags
 
 /* ------------------------------ */
@@ -1353,8 +1565,7 @@
     FETCH(a0, 1)                           #  a0 <- bbbb (lo)
     FETCH(a1, 2)                           #  a1 <- BBBB (hi)
     GET_OPA(a3)                            #  a3 <- AA
-    sll       t0, a1, 16
-    or        a0, a0, t0                   #  a0 <- BBBBbbbb
+    INSERT_HIGH_HALF(a0, a1)               #  a0 <- BBBBbbbb
     GET_VREG(a1, a3)                       #  a1 <- vAA
     EAS1(a0, rPC, a0)                      #  a0 <- PC + BBBBbbbb*2
     JAL(MterpDoPackedSwitch)                             #  a0 <- code-unit branch offset
@@ -1379,8 +1590,7 @@
     FETCH(a0, 1)                           #  a0 <- bbbb (lo)
     FETCH(a1, 2)                           #  a1 <- BBBB (hi)
     GET_OPA(a3)                            #  a3 <- AA
-    sll       t0, a1, 16
-    or        a0, a0, t0                   #  a0 <- BBBBbbbb
+    INSERT_HIGH_HALF(a0, a1)               #  a0 <- BBBBbbbb
     GET_VREG(a1, a3)                       #  a1 <- vAA
     EAS1(a0, rPC, a0)                      #  a0 <- PC + BBBBbbbb*2
     JAL(MterpDoSparseSwitch)                             #  a0 <- code-unit branch offset
@@ -1393,55 +1603,54 @@
 .L_op_cmpl_float: /* 0x2d */
 /* File: mips/op_cmpl_float.S */
     /*
-     * Compare two floating-point values.  Puts 0, 1, or -1 into the
-     * destination register rTEMP based on the results of the comparison.
-     *
-     * Provide a "naninst" instruction that puts 1 or -1 into rTEMP depending
-     * on what value we'd like to return when one of the operands is NaN.
-     *
-     * The operation we're implementing is:
-     *   if (x == y)
-     *     return 0;
-     *   else if (x < y)
-     *     return -1;
-     *   else if (x > y)
-     *     return 1;
-     *   else
-     *     return {-1 or 1};  // one or both operands was NaN
+     * Compare two floating-point values. Puts 0(==), 1(>), or -1(<)
+     * into the destination register based on the comparison results.
      *
      * for: cmpl-float, cmpg-float
      */
     /* op vAA, vBB, vCC */
 
-    /* "clasic" form */
     FETCH(a0, 1)                           #  a0 <- CCBB
     and       a2, a0, 255                  #  a2 <- BB
     srl       a3, a0, 8
     GET_VREG_F(ft0, a2)
     GET_VREG_F(ft1, a3)
 #ifdef MIPS32REVGE6
-    cmp.ult.s ft2, ft0, ft1               # Is ft0 < ft1
-    li        rTEMP, -1
-    bc1nez    ft2, .Lop_cmpl_float_finish
-    cmp.ult.s ft2, ft1, ft0
-    li        rTEMP, 1
-    bc1nez    ft2, .Lop_cmpl_float_finish
     cmp.eq.s  ft2, ft0, ft1
     li        rTEMP, 0
-    bc1nez    ft2, .Lop_cmpl_float_finish
-    b         .Lop_cmpl_float_nan
-#else
-    c.olt.s   fcc0, ft0, ft1               # Is ft0 < ft1
+    bc1nez    ft2, 1f                      # done if vBB == vCC (ordered)
+    .if 0
+    cmp.lt.s  ft2, ft0, ft1
     li        rTEMP, -1
-    bc1t      fcc0, .Lop_cmpl_float_finish
-    c.olt.s   fcc0, ft1, ft0
+    bc1nez    ft2, 1f                      # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    cmp.lt.s  ft2, ft1, ft0
     li        rTEMP, 1
-    bc1t      fcc0, .Lop_cmpl_float_finish
+    bc1nez    ft2, 1f                      # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
+#else
     c.eq.s    fcc0, ft0, ft1
     li        rTEMP, 0
-    bc1t      fcc0, .Lop_cmpl_float_finish
-    b         .Lop_cmpl_float_nan
+    bc1t      fcc0, 1f                     # done if vBB == vCC (ordered)
+    .if 0
+    c.olt.s   fcc0, ft0, ft1
+    li        rTEMP, -1
+    bc1t      fcc0, 1f                     # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    c.olt.s   fcc0, ft1, ft0
+    li        rTEMP, 1
+    bc1t      fcc0, 1f                     # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
 #endif
+1:
+    GET_OPA(rOBJ)
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
 
 /* ------------------------------ */
     .balign 128
@@ -1449,55 +1658,54 @@
 /* File: mips/op_cmpg_float.S */
 /* File: mips/op_cmpl_float.S */
     /*
-     * Compare two floating-point values.  Puts 0, 1, or -1 into the
-     * destination register rTEMP based on the results of the comparison.
-     *
-     * Provide a "naninst" instruction that puts 1 or -1 into rTEMP depending
-     * on what value we'd like to return when one of the operands is NaN.
-     *
-     * The operation we're implementing is:
-     *   if (x == y)
-     *     return 0;
-     *   else if (x < y)
-     *     return -1;
-     *   else if (x > y)
-     *     return 1;
-     *   else
-     *     return {-1 or 1};  // one or both operands was NaN
+     * Compare two floating-point values. Puts 0(==), 1(>), or -1(<)
+     * into the destination register based on the comparison results.
      *
      * for: cmpl-float, cmpg-float
      */
     /* op vAA, vBB, vCC */
 
-    /* "clasic" form */
     FETCH(a0, 1)                           #  a0 <- CCBB
     and       a2, a0, 255                  #  a2 <- BB
     srl       a3, a0, 8
     GET_VREG_F(ft0, a2)
     GET_VREG_F(ft1, a3)
 #ifdef MIPS32REVGE6
-    cmp.ult.s ft2, ft0, ft1               # Is ft0 < ft1
-    li        rTEMP, -1
-    bc1nez    ft2, .Lop_cmpg_float_finish
-    cmp.ult.s ft2, ft1, ft0
-    li        rTEMP, 1
-    bc1nez    ft2, .Lop_cmpg_float_finish
     cmp.eq.s  ft2, ft0, ft1
     li        rTEMP, 0
-    bc1nez    ft2, .Lop_cmpg_float_finish
-    b         .Lop_cmpg_float_nan
-#else
-    c.olt.s   fcc0, ft0, ft1               # Is ft0 < ft1
+    bc1nez    ft2, 1f                      # done if vBB == vCC (ordered)
+    .if 1
+    cmp.lt.s  ft2, ft0, ft1
     li        rTEMP, -1
-    bc1t      fcc0, .Lop_cmpg_float_finish
-    c.olt.s   fcc0, ft1, ft0
+    bc1nez    ft2, 1f                      # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    cmp.lt.s  ft2, ft1, ft0
     li        rTEMP, 1
-    bc1t      fcc0, .Lop_cmpg_float_finish
+    bc1nez    ft2, 1f                      # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
+#else
     c.eq.s    fcc0, ft0, ft1
     li        rTEMP, 0
-    bc1t      fcc0, .Lop_cmpg_float_finish
-    b         .Lop_cmpg_float_nan
+    bc1t      fcc0, 1f                     # done if vBB == vCC (ordered)
+    .if 1
+    c.olt.s   fcc0, ft0, ft1
+    li        rTEMP, -1
+    bc1t      fcc0, 1f                     # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    c.olt.s   fcc0, ft1, ft0
+    li        rTEMP, 1
+    bc1t      fcc0, 1f                     # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
 #endif
+1:
+    GET_OPA(rOBJ)
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
 
 
 /* ------------------------------ */
@@ -1506,47 +1714,55 @@
 /* File: mips/op_cmpl_double.S */
     /*
      * Compare two floating-point values. Puts 0(==), 1(>), or -1(<)
-     * into the destination register (rTEMP) based on the comparison results.
-     *
-     * Provide a "naninst" instruction that puts 1 or -1 into rTEMP depending
-     * on what value we'd like to return when one of the operands is NaN.
-     *
-     * See op_cmpl_float for more details.
+     * into the destination register based on the comparison results.
      *
      * For: cmpl-double, cmpg-double
      */
     /* op vAA, vBB, vCC */
 
     FETCH(a0, 1)                           #  a0 <- CCBB
-    and       rOBJ, a0, 255                #  s5 <- BB
+    and       rOBJ, a0, 255                #  rOBJ <- BB
     srl       t0, a0, 8                    #  t0 <- CC
-    EAS2(rOBJ, rFP, rOBJ)                  #  s5 <- &fp[BB]
+    EAS2(rOBJ, rFP, rOBJ)                  #  rOBJ <- &fp[BB]
     EAS2(t0, rFP, t0)                      #  t0 <- &fp[CC]
     LOAD64_F(ft0, ft0f, rOBJ)
     LOAD64_F(ft1, ft1f, t0)
 #ifdef MIPS32REVGE6
-    cmp.ult.d ft2, ft0, ft1
-    li        rTEMP, -1
-    bc1nez    ft2, .Lop_cmpl_double_finish
-    cmp.ult.d ft2, ft1, ft0
-    li        rTEMP, 1
-    bc1nez    ft2, .Lop_cmpl_double_finish
     cmp.eq.d  ft2, ft0, ft1
     li        rTEMP, 0
-    bc1nez    ft2, .Lop_cmpl_double_finish
-    b         .Lop_cmpl_double_nan
-#else
-    c.olt.d   fcc0, ft0, ft1
+    bc1nez    ft2, 1f                      # done if vBB == vCC (ordered)
+    .if 0
+    cmp.lt.d  ft2, ft0, ft1
     li        rTEMP, -1
-    bc1t      fcc0, .Lop_cmpl_double_finish
-    c.olt.d   fcc0, ft1, ft0
+    bc1nez    ft2, 1f                      # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    cmp.lt.d  ft2, ft1, ft0
     li        rTEMP, 1
-    bc1t      fcc0, .Lop_cmpl_double_finish
+    bc1nez    ft2, 1f                      # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
+#else
     c.eq.d    fcc0, ft0, ft1
     li        rTEMP, 0
-    bc1t      fcc0, .Lop_cmpl_double_finish
-    b         .Lop_cmpl_double_nan
+    bc1t      fcc0, 1f                     # done if vBB == vCC (ordered)
+    .if 0
+    c.olt.d   fcc0, ft0, ft1
+    li        rTEMP, -1
+    bc1t      fcc0, 1f                     # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    c.olt.d   fcc0, ft1, ft0
+    li        rTEMP, 1
+    bc1t      fcc0, 1f                     # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
 #endif
+1:
+    GET_OPA(rOBJ)
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
 
 /* ------------------------------ */
     .balign 128
@@ -1555,47 +1771,55 @@
 /* File: mips/op_cmpl_double.S */
     /*
      * Compare two floating-point values. Puts 0(==), 1(>), or -1(<)
-     * into the destination register (rTEMP) based on the comparison results.
-     *
-     * Provide a "naninst" instruction that puts 1 or -1 into rTEMP depending
-     * on what value we'd like to return when one of the operands is NaN.
-     *
-     * See op_cmpl_float for more details.
+     * into the destination register based on the comparison results.
      *
      * For: cmpl-double, cmpg-double
      */
     /* op vAA, vBB, vCC */
 
     FETCH(a0, 1)                           #  a0 <- CCBB
-    and       rOBJ, a0, 255                #  s5 <- BB
+    and       rOBJ, a0, 255                #  rOBJ <- BB
     srl       t0, a0, 8                    #  t0 <- CC
-    EAS2(rOBJ, rFP, rOBJ)                  #  s5 <- &fp[BB]
+    EAS2(rOBJ, rFP, rOBJ)                  #  rOBJ <- &fp[BB]
     EAS2(t0, rFP, t0)                      #  t0 <- &fp[CC]
     LOAD64_F(ft0, ft0f, rOBJ)
     LOAD64_F(ft1, ft1f, t0)
 #ifdef MIPS32REVGE6
-    cmp.ult.d ft2, ft0, ft1
-    li        rTEMP, -1
-    bc1nez    ft2, .Lop_cmpg_double_finish
-    cmp.ult.d ft2, ft1, ft0
-    li        rTEMP, 1
-    bc1nez    ft2, .Lop_cmpg_double_finish
     cmp.eq.d  ft2, ft0, ft1
     li        rTEMP, 0
-    bc1nez    ft2, .Lop_cmpg_double_finish
-    b         .Lop_cmpg_double_nan
-#else
-    c.olt.d   fcc0, ft0, ft1
+    bc1nez    ft2, 1f                      # done if vBB == vCC (ordered)
+    .if 1
+    cmp.lt.d  ft2, ft0, ft1
     li        rTEMP, -1
-    bc1t      fcc0, .Lop_cmpg_double_finish
-    c.olt.d   fcc0, ft1, ft0
+    bc1nez    ft2, 1f                      # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    cmp.lt.d  ft2, ft1, ft0
     li        rTEMP, 1
-    bc1t      fcc0, .Lop_cmpg_double_finish
+    bc1nez    ft2, 1f                      # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
+#else
     c.eq.d    fcc0, ft0, ft1
     li        rTEMP, 0
-    bc1t      fcc0, .Lop_cmpg_double_finish
-    b         .Lop_cmpg_double_nan
+    bc1t      fcc0, 1f                     # done if vBB == vCC (ordered)
+    .if 1
+    c.olt.d   fcc0, ft0, ft1
+    li        rTEMP, -1
+    bc1t      fcc0, 1f                     # done if vBB < vCC (ordered)
+    li        rTEMP, 1                     # vBB > vCC or unordered
+    .else
+    c.olt.d   fcc0, ft1, ft0
+    li        rTEMP, 1
+    bc1t      fcc0, 1f                     # done if vBB > vCC (ordered)
+    li        rTEMP, -1                    # vBB < vCC or unordered
+    .endif
 #endif
+1:
+    GET_OPA(rOBJ)
+    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
 
 
 /* ------------------------------ */
@@ -2015,11 +2239,7 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 2
     EASN(a0, a0, a1, 2)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     # a1 >= a3; compare unsigned index
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
@@ -2074,10 +2294,9 @@
     lw   a1, THREAD_EXCEPTION_OFFSET(rSELF)
     PREFETCH_INST(2)                       #  load rINST
     bnez a1, MterpException
-    SET_VREG_OBJECT(v0, rOBJ)              #  vAA <- v0
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_OBJECT_GOTO(v0, rOBJ, t0)     #  vAA <- v0
 
 /* ------------------------------ */
     .balign 128
@@ -2104,11 +2323,7 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 0
     EASN(a0, a0, a1, 0)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     # a1 >= a3; compare unsigned index
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
@@ -2142,11 +2357,7 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 0
     EASN(a0, a0, a1, 0)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     # a1 >= a3; compare unsigned index
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
@@ -2180,11 +2391,7 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 1
     EASN(a0, a0, a1, 1)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     # a1 >= a3; compare unsigned index
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
@@ -2218,11 +2425,7 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 1
     EASN(a0, a0, a1, 1)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     # a1 >= a3; compare unsigned index
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
@@ -2253,17 +2456,14 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 2
     EASN(a0, a0, a1, 2)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_VREG(a2, rOBJ)                     #  a2 <- vAA
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
     sw a2, MIRROR_INT_ARRAY_DATA_OFFSET(a0)            #  vBB[vCC] <- a2
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t0)                                 #  jump to next instruction
 
 /* ------------------------------ */
     .balign 128
@@ -2271,8 +2471,6 @@
 /* File: mips/op_aput_wide.S */
     /*
      * Array put, 64 bits.  vBB[vCC] <- vAA.
-     *
-     * Arrays of long/double are 64-bit aligned, so it's okay to use STRD.
      */
     /* aput-wide vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
@@ -2292,8 +2490,9 @@
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     LOAD64(a2, a3, rOBJ)                   #  a2/a3 <- vAA/vAA+1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
     STORE64_off(a2, a3, a0, MIRROR_WIDE_ARRAY_DATA_OFFSET) #  a2/a3 <- vBB[vCC]
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t0)                                 #  jump to next instruction
 
 /* ------------------------------ */
     .balign 128
@@ -2337,17 +2536,14 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 0
     EASN(a0, a0, a1, 0)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_VREG(a2, rOBJ)                     #  a2 <- vAA
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
     sb a2, MIRROR_BOOLEAN_ARRAY_DATA_OFFSET(a0)            #  vBB[vCC] <- a2
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t0)                                 #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -2373,17 +2569,14 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 0
     EASN(a0, a0, a1, 0)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_VREG(a2, rOBJ)                     #  a2 <- vAA
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
     sb a2, MIRROR_BYTE_ARRAY_DATA_OFFSET(a0)            #  vBB[vCC] <- a2
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t0)                                 #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -2409,17 +2602,14 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 1
     EASN(a0, a0, a1, 1)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_VREG(a2, rOBJ)                     #  a2 <- vAA
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
     sh a2, MIRROR_CHAR_ARRAY_DATA_OFFSET(a0)            #  vBB[vCC] <- a2
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t0)                                 #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -2445,17 +2635,14 @@
     # null array object?
     beqz      a0, common_errNullObject     #  yes, bail
     LOAD_base_offMirrorArray_length(a3, a0) #  a3 <- arrayObj->length
-    .if 1
     EASN(a0, a0, a1, 1)               #  a0 <- arrayObj + index*width
-    .else
-    addu      a0, a0, a1
-    .endif
     bgeu      a1, a3, common_errArrayIndex #  index >= length, bail
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_VREG(a2, rOBJ)                     #  a2 <- vAA
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t0)
     sh a2, MIRROR_SHORT_ARRAY_DATA_OFFSET(a0)            #  vBB[vCC] <- a2
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t0)                                 #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -2467,6 +2654,7 @@
      *
      * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
      */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
     GET_OPB(a1)                            # a1 <- B
@@ -2478,14 +2666,13 @@
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
     bnez  a3, MterpPossibleException        # bail out
-    .if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
-    .else
-    SET_VREG(v0, a2)                       # fp[A] <- v0
-    .endif
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    .if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[A] <- v0
+    .else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[A] <- v0
+    .endif
 
 /* ------------------------------ */
     .balign 128
@@ -2496,6 +2683,7 @@
      *
      * for: iget-wide
      */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field byte offset
     GET_OPB(a1)                            # a1 <- B
@@ -2507,10 +2695,9 @@
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
     bnez a3, MterpException                # bail out
-    SET_VREG64(v0, v1, a2)                 # fp[A] <- v0/v1
     ADVANCE(2)                             # advance rPC
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+    SET_VREG64_GOTO(v0, v1, a2, t0)        # fp[A] <- v0/v1
 
 /* ------------------------------ */
     .balign 128
@@ -2522,6 +2709,7 @@
      *
      * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
      */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
     GET_OPB(a1)                            # a1 <- B
@@ -2533,14 +2721,13 @@
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
     bnez  a3, MterpPossibleException        # bail out
-    .if 1
-    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
-    .else
-    SET_VREG(v0, a2)                       # fp[A] <- v0
-    .endif
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    .if 1
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[A] <- v0
+    .else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[A] <- v0
+    .endif
 
 
 /* ------------------------------ */
@@ -2553,6 +2740,7 @@
      *
      * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
      */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
     GET_OPB(a1)                            # a1 <- B
@@ -2564,14 +2752,13 @@
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
     bnez  a3, MterpPossibleException        # bail out
-    .if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
-    .else
-    SET_VREG(v0, a2)                       # fp[A] <- v0
-    .endif
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    .if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[A] <- v0
+    .else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[A] <- v0
+    .endif
 
 
 /* ------------------------------ */
@@ -2584,6 +2771,7 @@
      *
      * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
      */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
     GET_OPB(a1)                            # a1 <- B
@@ -2595,14 +2783,13 @@
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
     bnez  a3, MterpPossibleException        # bail out
-    .if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
-    .else
-    SET_VREG(v0, a2)                       # fp[A] <- v0
-    .endif
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    .if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[A] <- v0
+    .else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[A] <- v0
+    .endif
 
 
 /* ------------------------------ */
@@ -2615,6 +2802,7 @@
      *
      * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
      */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
     GET_OPB(a1)                            # a1 <- B
@@ -2626,14 +2814,13 @@
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
     bnez  a3, MterpPossibleException        # bail out
-    .if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
-    .else
-    SET_VREG(v0, a2)                       # fp[A] <- v0
-    .endif
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    .if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[A] <- v0
+    .else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[A] <- v0
+    .endif
 
 
 /* ------------------------------ */
@@ -2646,6 +2833,7 @@
      *
      * for: iget, iget-object, iget-boolean, iget-byte, iget-char, iget-short
      */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
     GET_OPB(a1)                            # a1 <- B
@@ -2657,14 +2845,13 @@
     GET_OPA4(a2)                           # a2<- A+
     PREFETCH_INST(2)                       # load rINST
     bnez  a3, MterpPossibleException        # bail out
-    .if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[A] <- v0
-    .else
-    SET_VREG(v0, a2)                       # fp[A] <- v0
-    .endif
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    .if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[A] <- v0
+    .else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[A] <- v0
+    .endif
 
 
 /* ------------------------------ */
@@ -2676,7 +2863,7 @@
      *
      * for: iput, iput-boolean, iput-byte, iput-char, iput-short
      */
-    # op vA, vB, field                     /* CCCC */
+    /* op vA, vB, field@CCCC */
     .extern artSet32InstanceFromMterp
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
@@ -2696,7 +2883,7 @@
     .balign 128
 .L_op_iput_wide: /* 0x5a */
 /* File: mips/op_iput_wide.S */
-    # iput-wide vA, vB, field              /* CCCC */
+    /* iput-wide vA, vB, field@CCCC */
     .extern artSet64InstanceFromMterp
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
@@ -2721,7 +2908,7 @@
      *
      * for: iput-object, iput-object-volatile
      */
-    # op vA, vB, field                     /* CCCC */
+    /* op vA, vB, field@CCCC */
     EXPORT_PC()
     addu   a0, rFP, OFF_FP_SHADOWFRAME
     move   a1, rPC
@@ -2743,7 +2930,7 @@
      *
      * for: iput, iput-boolean, iput-byte, iput-char, iput-short
      */
-    # op vA, vB, field                     /* CCCC */
+    /* op vA, vB, field@CCCC */
     .extern artSet8InstanceFromMterp
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
@@ -2770,7 +2957,7 @@
      *
      * for: iput, iput-boolean, iput-byte, iput-char, iput-short
      */
-    # op vA, vB, field                     /* CCCC */
+    /* op vA, vB, field@CCCC */
     .extern artSet8InstanceFromMterp
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
@@ -2797,7 +2984,7 @@
      *
      * for: iput, iput-boolean, iput-byte, iput-char, iput-short
      */
-    # op vA, vB, field                     /* CCCC */
+    /* op vA, vB, field@CCCC */
     .extern artSet16InstanceFromMterp
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
@@ -2824,7 +3011,7 @@
      *
      * for: iput, iput-boolean, iput-byte, iput-char, iput-short
      */
-    # op vA, vB, field                     /* CCCC */
+    /* op vA, vB, field@CCCC */
     .extern artSet16InstanceFromMterp
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
@@ -2850,7 +3037,7 @@
      *
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     .extern artGet32StaticFromCode
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
@@ -2861,14 +3048,13 @@
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
     bnez  a3, MterpException               # bail out
-.if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
-.else
-    SET_VREG(v0, a2)                       # fp[AA] <- v0
-.endif
     ADVANCE(2)
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+.if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[AA] <- v0
+.else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[AA] <- v0
+.endif
 
 /* ------------------------------ */
     .balign 128
@@ -2877,7 +3063,7 @@
     /*
      * 64-bit SGET handler.
      */
-    # sget-wide vAA, field                 /* BBBB */
+    /* sget-wide vAA, field@BBBB */
     .extern artGet64StaticFromCode
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
@@ -2888,9 +3074,8 @@
     bnez  a3, MterpException
     GET_OPA(a1)                            # a1 <- AA
     FETCH_ADVANCE_INST(2)                  # advance rPC, load rINST
-    SET_VREG64(v0, v1, a1)                 # vAA/vAA+1 <- v0/v1
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+    SET_VREG64_GOTO(v0, v1, a1, t0)        # vAA/vAA+1 <- v0/v1
 
 /* ------------------------------ */
     .balign 128
@@ -2902,7 +3087,7 @@
      *
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     .extern artGetObjStaticFromCode
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
@@ -2913,14 +3098,13 @@
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
     bnez  a3, MterpException               # bail out
-.if 1
-    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
-.else
-    SET_VREG(v0, a2)                       # fp[AA] <- v0
-.endif
     ADVANCE(2)
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+.if 1
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[AA] <- v0
+.else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[AA] <- v0
+.endif
 
 
 /* ------------------------------ */
@@ -2933,7 +3117,7 @@
      *
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     .extern artGetBooleanStaticFromCode
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
@@ -2944,14 +3128,13 @@
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
     bnez  a3, MterpException               # bail out
-.if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
-.else
-    SET_VREG(v0, a2)                       # fp[AA] <- v0
-.endif
     ADVANCE(2)
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+.if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[AA] <- v0
+.else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[AA] <- v0
+.endif
 
 
 /* ------------------------------ */
@@ -2964,7 +3147,7 @@
      *
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     .extern artGetByteStaticFromCode
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
@@ -2975,14 +3158,13 @@
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
     bnez  a3, MterpException               # bail out
-.if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
-.else
-    SET_VREG(v0, a2)                       # fp[AA] <- v0
-.endif
     ADVANCE(2)
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+.if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[AA] <- v0
+.else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[AA] <- v0
+.endif
 
 
 /* ------------------------------ */
@@ -2995,7 +3177,7 @@
      *
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     .extern artGetCharStaticFromCode
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
@@ -3006,14 +3188,13 @@
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
     bnez  a3, MterpException               # bail out
-.if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
-.else
-    SET_VREG(v0, a2)                       # fp[AA] <- v0
-.endif
     ADVANCE(2)
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+.if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[AA] <- v0
+.else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[AA] <- v0
+.endif
 
 
 /* ------------------------------ */
@@ -3026,7 +3207,7 @@
      *
      * for: sget, sget-object, sget-boolean, sget-byte, sget-char, sget-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     .extern artGetShortStaticFromCode
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
@@ -3037,14 +3218,13 @@
     GET_OPA(a2)                            # a2 <- AA
     PREFETCH_INST(2)
     bnez  a3, MterpException               # bail out
-.if 0
-    SET_VREG_OBJECT(v0, a2)                # fp[AA] <- v0
-.else
-    SET_VREG(v0, a2)                       # fp[AA] <- v0
-.endif
     ADVANCE(2)
     GET_INST_OPCODE(t0)                    # extract opcode from rINST
-    GOTO_OPCODE(t0)                        # jump to next instruction
+.if 0
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       # fp[AA] <- v0
+.else
+    SET_VREG_GOTO(v0, a2, t0)              # fp[AA] <- v0
+.endif
 
 
 /* ------------------------------ */
@@ -3056,7 +3236,7 @@
      *
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     GET_OPA(a3)                            # a3 <- AA
@@ -3077,7 +3257,7 @@
     /*
      * 64-bit SPUT handler.
      */
-    # sput-wide vAA, field                 /* BBBB */
+    /* sput-wide vAA, field@BBBB */
     .extern artSet64IndirectStaticFromMterp
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref CCCC
@@ -3123,7 +3303,7 @@
      *
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     GET_OPA(a3)                            # a3 <- AA
@@ -3148,7 +3328,7 @@
      *
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     GET_OPA(a3)                            # a3 <- AA
@@ -3173,7 +3353,7 @@
      *
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     GET_OPA(a3)                            # a3 <- AA
@@ -3198,7 +3378,7 @@
      *
      * for: sput, sput-boolean, sput-byte, sput-char, sput-short
      */
-    # op vAA, field                        /* BBBB */
+    /* op vAA, field@BBBB */
     EXPORT_PC()
     FETCH(a0, 1)                           # a0 <- field ref BBBB
     GET_OPA(a3)                            # a3 <- AA
@@ -3221,8 +3401,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeVirtual
     EXPORT_PC()
     move    a0, rSELF
@@ -3246,8 +3426,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeSuper
     EXPORT_PC()
     move    a0, rSELF
@@ -3271,8 +3451,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeDirect
     EXPORT_PC()
     move    a0, rSELF
@@ -3296,8 +3476,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeStatic
     EXPORT_PC()
     move    a0, rSELF
@@ -3321,8 +3501,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeInterface
     EXPORT_PC()
     move    a0, rSELF
@@ -3344,7 +3524,7 @@
 /* File: mips/op_return_void_no_barrier.S */
     lw     ra, THREAD_FLAGS_OFFSET(rSELF)
     move   a0, rSELF
-    and    ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and    ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqz   ra, 1f
     JAL(MterpSuspendCheck)                 # (self)
 1:
@@ -3360,8 +3540,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeVirtualRange
     EXPORT_PC()
     move    a0, rSELF
@@ -3385,8 +3565,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeSuperRange
     EXPORT_PC()
     move    a0, rSELF
@@ -3410,8 +3590,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeDirectRange
     EXPORT_PC()
     move    a0, rSELF
@@ -3435,8 +3615,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeStaticRange
     EXPORT_PC()
     move    a0, rSELF
@@ -3460,8 +3640,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeInterfaceRange
     EXPORT_PC()
     move    a0, rSELF
@@ -3506,11 +3686,11 @@
 /* File: mips/unop.S */
     /*
      * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
+     * specifies an instruction that performs "result0 = op a0".
      * This could be a MIPS instruction or a function call.
      *
-     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
-     *      int-to-byte, int-to-char, int-to-short
+     * for: int-to-byte, int-to-char, int-to-short,
+     *      neg-int, not-int, neg-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
@@ -3520,8 +3700,7 @@
                                   #  optional op
     negu a0, a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, t0, t1)        #  vAA <- result0
-    /* 9-10 instructions */
+    SET_VREG_GOTO(a0, t0, t1)        #  vA <- result0
 
 
 /* ------------------------------ */
@@ -3531,11 +3710,11 @@
 /* File: mips/unop.S */
     /*
      * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
+     * specifies an instruction that performs "result0 = op a0".
      * This could be a MIPS instruction or a function call.
      *
-     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
-     *      int-to-byte, int-to-char, int-to-short
+     * for: int-to-byte, int-to-char, int-to-short,
+     *      neg-int, not-int, neg-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
@@ -3545,8 +3724,7 @@
                                   #  optional op
     not a0, a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, t0, t1)        #  vAA <- result0
-    /* 9-10 instructions */
+    SET_VREG_GOTO(a0, t0, t1)        #  vA <- result0
 
 
 /* ------------------------------ */
@@ -3556,7 +3734,7 @@
 /* File: mips/unopWide.S */
     /*
      * Generic 64-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0/a1".
+     * specifies an instruction that performs "result0/result1 = op a0/a1".
      * This could be MIPS instruction or a function call.
      *
      * For: neg-long, not-long, neg-double,
@@ -3565,14 +3743,12 @@
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
-    LOAD64(a0, a1, a3)                     #  a0/a1 <- vAA
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- vA
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     negu v0, a0                              #  optional op
     negu v1, a1; sltu a0, zero, v0; subu v1, v1, a0                                 #  a0/a1 <- op, a2-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(v0, v1, rOBJ)   #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-13 instructions */
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vA/vA+1 <- a0/a1
 
 
 /* ------------------------------ */
@@ -3582,7 +3758,7 @@
 /* File: mips/unopWide.S */
     /*
      * Generic 64-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0/a1".
+     * specifies an instruction that performs "result0/result1 = op a0/a1".
      * This could be MIPS instruction or a function call.
      *
      * For: neg-long, not-long, neg-double,
@@ -3591,14 +3767,12 @@
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
-    LOAD64(a0, a1, a3)                     #  a0/a1 <- vAA
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- vA
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     not a0, a0                              #  optional op
     not a1, a1                                 #  a0/a1 <- op, a2-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, rOBJ)   #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-13 instructions */
+    SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vA/vA+1 <- a0/a1
 
 
 /* ------------------------------ */
@@ -3608,11 +3782,11 @@
 /* File: mips/unop.S */
     /*
      * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
+     * specifies an instruction that performs "result0 = op a0".
      * This could be a MIPS instruction or a function call.
      *
-     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
-     *      int-to-byte, int-to-char, int-to-short
+     * for: int-to-byte, int-to-char, int-to-short,
+     *      neg-int, not-int, neg-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
@@ -3622,8 +3796,7 @@
                                   #  optional op
     addu a0, a0, 0x80000000                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, t0, t1)        #  vAA <- result0
-    /* 9-10 instructions */
+    SET_VREG_GOTO(a0, t0, t1)        #  vA <- result0
 
 
 /* ------------------------------ */
@@ -3633,7 +3806,7 @@
 /* File: mips/unopWide.S */
     /*
      * Generic 64-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0/a1".
+     * specifies an instruction that performs "result0/result1 = op a0/a1".
      * This could be MIPS instruction or a function call.
      *
      * For: neg-long, not-long, neg-double,
@@ -3642,14 +3815,12 @@
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
-    LOAD64(a0, a1, a3)                     #  a0/a1 <- vAA
+    LOAD64(a0, a1, a3)                     #  a0/a1 <- vA
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
                                   #  optional op
     addu a1, a1, 0x80000000                                 #  a0/a1 <- op, a2-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, rOBJ)   #  vAA <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-13 instructions */
+    SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vA/vA+1 <- a0/a1
 
 
 /* ------------------------------ */
@@ -3659,8 +3830,7 @@
 /* File: mips/unopWider.S */
     /*
      * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0", where
-     * "result" is a 64-bit quantity in a0/a1.
+     * that specifies an instruction that performs "result0/result1 = op a0".
      *
      * For: int-to-long
      */
@@ -3672,9 +3842,7 @@
                                   #  optional op
     sra a1, a0, 31                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, rOBJ)   #  vA/vA+1 <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 10-11 instructions */
+    SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vA/vA+1 <- a0/a1
 
 
 /* ------------------------------ */
@@ -3683,23 +3851,20 @@
 /* File: mips/op_int_to_float.S */
 /* File: mips/funop.S */
     /*
-     * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
+     * Generic 32-bit floating-point unary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = op fa0".
      * This could be a MIPS instruction or a function call.
      *
-     * for: int-to-float, float-to-int
+     * for: int-to-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
-    GET_OPA4(rOBJ)                         #  t0 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG_F(fa0, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     cvt.s.w fv0, fa0
-
-.Lop_int_to_float_set_vreg_f:
-    SET_VREG_F(fv0, rOBJ)
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    GOTO_OPCODE(t1)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t1)         #  vA <- fv0
 
 
 /* ------------------------------ */
@@ -3708,11 +3873,10 @@
 /* File: mips/op_int_to_double.S */
 /* File: mips/funopWider.S */
     /*
-     * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0", where
-     * "result" is a 64-bit quantity in a0/a1.
+     * Generic 32bit-to-64bit floating-point unary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = op fa0".
      *
-     * For: int-to-double, float-to-long, float-to-double
+     * For: int-to-double, float-to-double
      */
     /* unop vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -3720,11 +3884,8 @@
     GET_VREG_F(fa0, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     cvt.d.w fv0, fa0
-
-.Lop_int_to_double_set_vreg:
-    SET_VREG64_F(fv0, fv0f, rOBJ)                             #  vA/vA+1 <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0) #  vA/vA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -3741,120 +3902,157 @@
     GET_VREG(a2, a1)                       #  a2 <- fp[B]
     GET_INST_OPCODE(t0)                    #  t0 <- opcode from rINST
     .if 0
-    SET_VREG_OBJECT(a2, a0)                #  fp[A] <- a2
+    SET_VREG_OBJECT_GOTO(a2, a0, t0)       #  fp[A] <- a2
     .else
-    SET_VREG(a2, a0)                       #  fp[A] <- a2
+    SET_VREG_GOTO(a2, a0, t0)              #  fp[A] <- a2
     .endif
-    GOTO_OPCODE(t0)                        #  jump to next instruction
 
 
 /* ------------------------------ */
     .balign 128
 .L_op_long_to_float: /* 0x85 */
 /* File: mips/op_long_to_float.S */
-/* File: mips/unopNarrower.S */
     /*
-     * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0/a1", where
-     * "result" is a 32-bit quantity in a0.
-     *
-     * For: long-to-float, double-to-int, double-to-float
-     * If hard floating point support is available, use fa0 as the parameter,
-     * except for long-to-float opcode.
-     * (This would work for long-to-int, but that instruction is actually
-     * an exact match for OP_MOVE.)
+     * long-to-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+
+#ifdef MIPS32REVGE6
+    LOAD64_F(fv0, fv0f, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    cvt.s.l   fv0, fv0
+#else
     LOAD64(rARG0, rARG1, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     JAL(__floatdisf)
+#endif
 
-.Lop_long_to_float_set_vreg_f:
-    SET_VREG_F(fv0, rOBJ)                  #  vA <- result0
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- fv0
 
 /* ------------------------------ */
     .balign 128
 .L_op_long_to_double: /* 0x86 */
 /* File: mips/op_long_to_double.S */
-/* File: mips/funopWide.S */
     /*
-     * Generic 64-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0/a1".
-     * This could be a MIPS instruction or a function call.
-     *
-     * long-to-double, double-to-long
+     * long-to-double
      */
     /* unop vA, vB */
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
+
+#ifdef MIPS32REVGE6
+    LOAD64_F(fv0, fv0f, a3)
+    FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
+    cvt.d.l   fv0, fv0
+#else
     LOAD64(rARG0, rARG1, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-                                  #  optional op
-    JAL(__floatdidf)                                 #  a0/a1 <- op, a2-a3 changed
+    JAL(__floatdidf)                       #  a0/a1 <- op, a2-a3 changed
+#endif
 
-.Lop_long_to_double_set_vreg:
-    SET_VREG64_F(fv0, fv0f, rOBJ)                             #  vAA <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-13 instructions */
-
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0) #  vA/vA+1 <- result
 
 /* ------------------------------ */
     .balign 128
 .L_op_float_to_int: /* 0x87 */
 /* File: mips/op_float_to_int.S */
-/* File: mips/funop.S */
     /*
-     * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
-     * This could be a MIPS instruction or a function call.
+     * float-to-int
      *
-     * for: int-to-float, float-to-int
+     * We have to clip values to int min/max per the specification.  The
+     * expected common case is a "reasonable" value that converts directly
+     * to modest integer.  The EABI convert function isn't doing this for us.
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
-    GET_OPA4(rOBJ)                         #  t0 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG_F(fa0, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    b f2i_doconv
 
-.Lop_float_to_int_set_vreg_f:
-    SET_VREG_F(fv0, rOBJ)
+    li        t0, INT_MIN_AS_FLOAT
+    mtc1      t0, fa1
+#ifdef MIPS32REVGE6
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    cmp.le.s  ft0, fa1, fa0
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    GOTO_OPCODE(t1)                        #  jump to next instruction
-
+    bc1nez    ft0, 1f                      #  if INT_MIN <= vB, proceed to truncation
+    cmp.eq.s  ft0, fa0, fa0
+    selnez.s  fa0, fa1, ft0                #  fa0 = ordered(vB) ? INT_MIN_AS_FLOAT : 0
+#else
+    c.ole.s   fcc0, fa1, fa0
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1t      fcc0, 1f                     #  if INT_MIN <= vB, proceed to truncation
+    c.eq.s    fcc0, fa0, fa0
+    mtc1      zero, fa0
+    movt.s    fa0, fa1, fcc0               #  fa0 = ordered(vB) ? INT_MIN_AS_FLOAT : 0
+#endif
+1:
+    trunc.w.s fa0, fa0
+    SET_VREG_F_GOTO(fa0, rOBJ, t1)         #  vA <- result
 
 /* ------------------------------ */
     .balign 128
 .L_op_float_to_long: /* 0x88 */
 /* File: mips/op_float_to_long.S */
-/* File: mips/funopWider.S */
     /*
-     * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0", where
-     * "result" is a 64-bit quantity in a0/a1.
+     * float-to-long
      *
-     * For: int-to-double, float-to-long, float-to-double
+     * We have to clip values to long min/max per the specification.  The
+     * expected common case is a "reasonable" value that converts directly
+     * to modest integer.  The EABI convert function isn't doing this for us.
      */
     /* unop vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG_F(fa0, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    b f2l_doconv
 
-.Lop_float_to_long_set_vreg:
-    SET_VREG64(rRESULT0, rRESULT1, rOBJ)                             #  vA/vA+1 <- a0/a1
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+#ifdef MIPS32REVGE6
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    li        t0, LONG_MIN_AS_FLOAT
+    mtc1      t0, fa1
+    cmp.le.s  ft0, fa1, fa0
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1nez    ft0, 1f                      #  if LONG_MIN <= vB, proceed to truncation
+    cmp.eq.s  ft0, fa0, fa0
+    selnez.s  fa0, fa1, ft0                #  fa0 = ordered(vB) ? LONG_MIN_AS_FLOAT : 0
+1:
+    trunc.l.s fa0, fa0
+    SET_VREG64_F_GOTO(fa0, fa0f, rOBJ, t1) #  vA <- result
+#else
+    c.eq.s    fcc0, fa0, fa0
+    li        rRESULT0, 0
+    li        rRESULT1, 0
+    bc1f      fcc0, .Lop_float_to_long_get_opcode
 
+    li        t0, LONG_MIN_AS_FLOAT
+    mtc1      t0, fa1
+    c.ole.s   fcc0, fa0, fa1
+    li        rRESULT1, LONG_MIN_HIGH
+    bc1t      fcc0, .Lop_float_to_long_get_opcode
+
+    neg.s     fa1, fa1
+    c.ole.s   fcc0, fa1, fa0
+    nor       rRESULT0, rRESULT0, zero
+    nor       rRESULT1, rRESULT1, zero
+    bc1t      fcc0, .Lop_float_to_long_get_opcode
+
+    JAL(__fixsfdi)
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    b         .Lop_float_to_long_set_vreg
+#endif
 
 /* ------------------------------ */
     .balign 128
@@ -3862,11 +4060,10 @@
 /* File: mips/op_float_to_double.S */
 /* File: mips/funopWider.S */
     /*
-     * Generic 32bit-to-64bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0", where
-     * "result" is a 64-bit quantity in a0/a1.
+     * Generic 32bit-to-64bit floating-point unary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = op fa0".
      *
-     * For: int-to-double, float-to-long, float-to-double
+     * For: int-to-double, float-to-double
      */
     /* unop vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -3874,77 +4071,111 @@
     GET_VREG_F(fa0, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     cvt.d.s fv0, fa0
-
-.Lop_float_to_double_set_vreg:
-    SET_VREG64_F(fv0, fv0f, rOBJ)                             #  vA/vA+1 <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0) #  vA/vA+1 <- fv0
 
 
 /* ------------------------------ */
     .balign 128
 .L_op_double_to_int: /* 0x8a */
 /* File: mips/op_double_to_int.S */
-/* File: mips/unopNarrower.S */
     /*
-     * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0/a1", where
-     * "result" is a 32-bit quantity in a0.
+     * double-to-int
      *
-     * For: long-to-float, double-to-int, double-to-float
-     * If hard floating point support is available, use fa0 as the parameter,
-     * except for long-to-float opcode.
-     * (This would work for long-to-int, but that instruction is actually
-     * an exact match for OP_MOVE.)
+     * We have to clip values to int min/max per the specification.  The
+     * expected common case is a "reasonable" value that converts directly
+     * to modest integer.  The EABI convert function isn't doing this for us.
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
     LOAD64_F(fa0, fa0f, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    b d2i_doconv
 
-.Lop_double_to_int_set_vreg_f:
-    SET_VREG_F(fv0, rOBJ)                  #  vA <- result0
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
-/*
- * Convert the double in a0/a1 to an int in a0.
- *
- * We have to clip values to int min/max per the specification.  The
- * expected common case is a "reasonable" value that converts directly
- * to modest integer.  The EABI convert function isn't doing this for us.
- */
+    li        t0, INT_MIN_AS_DOUBLE_HIGH
+    mtc1      zero, fa1
+    MOVE_TO_FPU_HIGH(t0, fa1, fa1f)
+#ifdef MIPS32REVGE6
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    cmp.le.d  ft0, fa1, fa0
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1nez    ft0, 1f                      #  if INT_MIN <= vB, proceed to truncation
+    cmp.eq.d  ft0, fa0, fa0
+    selnez.d  fa0, fa1, ft0                #  fa0 = ordered(vB) ? INT_MIN_AS_DOUBLE : 0
+#else
+    c.ole.d   fcc0, fa1, fa0
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1t      fcc0, 1f                     #  if INT_MIN <= vB, proceed to truncation
+    c.eq.d    fcc0, fa0, fa0
+    mtc1      zero, fa0
+    MOVE_TO_FPU_HIGH(zero, fa0, fa0f)
+    movt.d    fa0, fa1, fcc0               #  fa0 = ordered(vB) ? INT_MIN_AS_DOUBLE : 0
+#endif
+1:
+    trunc.w.d fa0, fa0
+    SET_VREG_F_GOTO(fa0, rOBJ, t1)         #  vA <- result
 
 /* ------------------------------ */
     .balign 128
 .L_op_double_to_long: /* 0x8b */
 /* File: mips/op_double_to_long.S */
-/* File: mips/funopWide.S */
     /*
-     * Generic 64-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0/a1".
-     * This could be a MIPS instruction or a function call.
+     * double-to-long
      *
-     * long-to-double, double-to-long
+     * We have to clip values to long min/max per the specification.  The
+     * expected common case is a "reasonable" value that converts directly
+     * to modest integer.  The EABI convert function isn't doing this for us.
      */
     /* unop vA, vB */
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
     LOAD64_F(fa0, fa0f, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-                                  #  optional op
-    b d2l_doconv                                 #  a0/a1 <- op, a2-a3 changed
 
-.Lop_double_to_long_set_vreg:
-    SET_VREG64(rRESULT0, rRESULT1, rOBJ)                             #  vAA <- a0/a1
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-13 instructions */
+#ifdef MIPS32REVGE6
+    /*
+     * TODO: simplify this when the MIPS64R6 emulator
+     * supports NAN2008=1.
+     */
+    li        t0, LONG_MIN_AS_DOUBLE_HIGH
+    mtc1      zero, fa1
+    mthc1     t0, fa1
+    cmp.le.d  ft0, fa1, fa0
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    bc1nez    ft0, 1f                      #  if LONG_MIN <= vB, proceed to truncation
+    cmp.eq.d  ft0, fa0, fa0
+    selnez.d  fa0, fa1, ft0                #  fa0 = ordered(vB) ? LONG_MIN_AS_DOUBLE : 0
+1:
+    trunc.l.d fa0, fa0
+    SET_VREG64_F_GOTO(fa0, fa0f, rOBJ, t1) #  vA <- result
+#else
+    c.eq.d    fcc0, fa0, fa0
+    li        rRESULT0, 0
+    li        rRESULT1, 0
+    bc1f      fcc0, .Lop_double_to_long_get_opcode
 
+    li        t0, LONG_MIN_AS_DOUBLE_HIGH
+    mtc1      zero, fa1
+    MOVE_TO_FPU_HIGH(t0, fa1, fa1f)
+    c.ole.d   fcc0, fa0, fa1
+    li        rRESULT1, LONG_MIN_HIGH
+    bc1t      fcc0, .Lop_double_to_long_get_opcode
+
+    neg.d     fa1, fa1
+    c.ole.d   fcc0, fa1, fa0
+    nor       rRESULT0, rRESULT0, zero
+    nor       rRESULT1, rRESULT1, zero
+    bc1t      fcc0, .Lop_double_to_long_get_opcode
+
+    JAL(__fixdfdi)
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    b         .Lop_double_to_long_set_vreg
+#endif
 
 /* ------------------------------ */
     .balign 128
@@ -3952,28 +4183,20 @@
 /* File: mips/op_double_to_float.S */
 /* File: mips/unopNarrower.S */
     /*
-     * Generic 64bit-to-32bit unary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = op a0/a1", where
-     * "result" is a 32-bit quantity in a0.
+     * Generic 64bit-to-32bit floating-point unary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = op fa0".
      *
-     * For: long-to-float, double-to-int, double-to-float
-     * If hard floating point support is available, use fa0 as the parameter,
-     * except for long-to-float opcode.
-     * (This would work for long-to-int, but that instruction is actually
-     * an exact match for OP_MOVE.)
+     * For: double-to-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     EAS2(a3, rFP, a3)                      #  a3 <- &fp[B]
     LOAD64_F(fa0, fa0f, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     cvt.s.d fv0, fa0
-
-.Lop_double_to_float_set_vreg_f:
-    SET_VREG_F(fv0, rOBJ)                  #  vA <- result0
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- fv0
 
 
 /* ------------------------------ */
@@ -3983,22 +4206,21 @@
 /* File: mips/unop.S */
     /*
      * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
+     * specifies an instruction that performs "result0 = op a0".
      * This could be a MIPS instruction or a function call.
      *
-     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
-     *      int-to-byte, int-to-char, int-to-short
+     * for: int-to-byte, int-to-char, int-to-short,
+     *      neg-int, not-int, neg-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
     GET_OPA4(t0)                           #  t0 <- A+
     GET_VREG(a0, a3)                       #  a0 <- vB
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    sll a0, a0, 24                              #  optional op
-    sra a0, a0, 24                                 #  a0 <- op, a0-a3 changed
+                                  #  optional op
+    SEB(a0, a0)                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, t0, t1)        #  vAA <- result0
-    /* 9-10 instructions */
+    SET_VREG_GOTO(a0, t0, t1)        #  vA <- result0
 
 
 /* ------------------------------ */
@@ -4008,11 +4230,11 @@
 /* File: mips/unop.S */
     /*
      * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
+     * specifies an instruction that performs "result0 = op a0".
      * This could be a MIPS instruction or a function call.
      *
-     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
-     *      int-to-byte, int-to-char, int-to-short
+     * for: int-to-byte, int-to-char, int-to-short,
+     *      neg-int, not-int, neg-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
@@ -4022,8 +4244,7 @@
                                   #  optional op
     and a0, 0xffff                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, t0, t1)        #  vAA <- result0
-    /* 9-10 instructions */
+    SET_VREG_GOTO(a0, t0, t1)        #  vA <- result0
 
 
 /* ------------------------------ */
@@ -4033,22 +4254,21 @@
 /* File: mips/unop.S */
     /*
      * Generic 32-bit unary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = op a0".
+     * specifies an instruction that performs "result0 = op a0".
      * This could be a MIPS instruction or a function call.
      *
-     * for: neg-int, not-int, neg-float, int-to-float, float-to-int,
-     *      int-to-byte, int-to-char, int-to-short
+     * for: int-to-byte, int-to-char, int-to-short,
+     *      neg-int, not-int, neg-float
      */
     /* unop vA, vB */
     GET_OPB(a3)                            #  a3 <- B
     GET_OPA4(t0)                           #  t0 <- A+
     GET_VREG(a0, a3)                       #  a0 <- vB
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
-    sll a0, 16                              #  optional op
-    sra a0, 16                                 #  a0 <- op, a0-a3 changed
+                                  #  optional op
+    SEH(a0, a0)                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, t0, t1)        #  vAA <- result0
-    /* 9-10 instructions */
+    SET_VREG_GOTO(a0, t0, t1)        #  vA <- result0
 
 
 /* ------------------------------ */
@@ -4087,7 +4307,6 @@
     addu a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 
 /* ------------------------------ */
@@ -4126,7 +4345,6 @@
     subu a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 
 /* ------------------------------ */
@@ -4165,7 +4383,6 @@
     mul a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 
 /* ------------------------------ */
@@ -4205,7 +4422,6 @@
     div a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 #else
 /* File: mips/binop.S */
@@ -4240,7 +4456,6 @@
     mflo a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 #endif
 
@@ -4281,7 +4496,6 @@
     mod a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 #else
 /* File: mips/binop.S */
@@ -4316,7 +4530,6 @@
     mfhi a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 #endif
 
@@ -4356,7 +4569,6 @@
     and a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 
 /* ------------------------------ */
@@ -4395,7 +4607,6 @@
     or a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 
 /* ------------------------------ */
@@ -4434,7 +4645,6 @@
     xor a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 
 /* ------------------------------ */
@@ -4473,7 +4683,6 @@
     sll a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 
 /* ------------------------------ */
@@ -4512,7 +4721,6 @@
     sra a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 
 /* ------------------------------ */
@@ -4551,7 +4759,6 @@
     srl a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 11-14 instructions */
 
 
 /* ------------------------------ */
@@ -4571,10 +4778,10 @@
      * Generic 64-bit binary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vCC (a2-a3).  Useful for integer division and modulus.
      *
      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
      *      xor-long
@@ -4600,7 +4807,6 @@
     addu a1, a3, a1; sltu v1, v0, a2; addu v1, v1, a1                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vAA/vAA+1 <- v0/v1
-    /* 14-17 instructions */
 
 
 /* ------------------------------ */
@@ -4619,10 +4825,10 @@
      * Generic 64-bit binary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vCC (a2-a3).  Useful for integer division and modulus.
      *
      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
      *      xor-long
@@ -4648,7 +4854,6 @@
     subu v1, a1, a3; sltu a0, a0, v0; subu v1, v1, a0                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vAA/vAA+1 <- v0/v1
-    /* 14-17 instructions */
 
 
 /* ------------------------------ */
@@ -4702,10 +4907,10 @@
      * Generic 64-bit binary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vCC (a2-a3).  Useful for integer division and modulus.
      *
      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
      *      xor-long
@@ -4731,7 +4936,6 @@
     JAL(__divdi3)                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vAA/vAA+1 <- v0/v1
-    /* 14-17 instructions */
 
 
 /* ------------------------------ */
@@ -4743,10 +4947,10 @@
      * Generic 64-bit binary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vCC (a2-a3).  Useful for integer division and modulus.
      *
      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
      *      xor-long
@@ -4772,7 +4976,6 @@
     JAL(__moddi3)                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vAA/vAA+1 <- v0/v1
-    /* 14-17 instructions */
 
 
 /* ------------------------------ */
@@ -4784,10 +4987,10 @@
      * Generic 64-bit binary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vCC (a2-a3).  Useful for integer division and modulus.
      *
      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
      *      xor-long
@@ -4813,7 +5016,6 @@
     and a1, a1, a3                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vAA/vAA+1 <- a0/a1
-    /* 14-17 instructions */
 
 
 /* ------------------------------ */
@@ -4825,10 +5027,10 @@
      * Generic 64-bit binary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vCC (a2-a3).  Useful for integer division and modulus.
      *
      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
      *      xor-long
@@ -4854,7 +5056,6 @@
     or a1, a1, a3                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vAA/vAA+1 <- a0/a1
-    /* 14-17 instructions */
 
 
 /* ------------------------------ */
@@ -4866,10 +5067,10 @@
      * Generic 64-bit binary operation.  Provide an "instr" line that
      * specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vCC (a2-a3).  Useful for integer division and modulus.
      *
      * for: add-long, sub-long, div-long, rem-long, and-long, or-long,
      *      xor-long
@@ -4895,7 +5096,6 @@
     xor a1, a1, a3                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vAA/vAA+1 <- a0/a1
-    /* 14-17 instructions */
 
 
 /* ------------------------------ */
@@ -4928,7 +5128,7 @@
     srl     a0, v1                         #  alo<- alo >> (32-(shift&31))
     sll     v1, a1, a2                     #  rhi<- ahi << (shift&31)
     or      v1, a0                         #  rhi<- rhi | alo
-    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vAA/vAA+1 <- a0/a1
+    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vAA/vAA+1 <- v0/v1
 
 /* ------------------------------ */
     .balign 128
@@ -4959,7 +5159,7 @@
     sll     a1, 1
     sll     a1, a0                         #  ahi<- ahi << (32-(shift&31))
     or      v0, a1                         #  rlo<- rlo | ahi
-    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vAA/VAA+1 <- v0/v0
+    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vAA/VAA+1 <- v0/v1
 
 /* ------------------------------ */
     .balign 128
@@ -5006,7 +5206,7 @@
 
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     srl       a3, a0, 8                    #  a3 <- CC
     and       a2, a0, 255                  #  a2 <- BB
     GET_VREG_F(fa1, a3)                    #  a1 <- vCC
@@ -5014,9 +5214,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     add.s fv0, fa0, fa1                                 #  f0 = result
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- fv0
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vAA <- fv0
 
 
 /* ------------------------------ */
@@ -5032,7 +5231,7 @@
 
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     srl       a3, a0, 8                    #  a3 <- CC
     and       a2, a0, 255                  #  a2 <- BB
     GET_VREG_F(fa1, a3)                    #  a1 <- vCC
@@ -5040,9 +5239,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     sub.s fv0, fa0, fa1                                 #  f0 = result
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- fv0
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vAA <- fv0
 
 
 /* ------------------------------ */
@@ -5058,7 +5256,7 @@
 
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     srl       a3, a0, 8                    #  a3 <- CC
     and       a2, a0, 255                  #  a2 <- BB
     GET_VREG_F(fa1, a3)                    #  a1 <- vCC
@@ -5066,9 +5264,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     mul.s fv0, fa0, fa1                                 #  f0 = result
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- fv0
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vAA <- fv0
 
 
 /* ------------------------------ */
@@ -5084,7 +5281,7 @@
 
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     srl       a3, a0, 8                    #  a3 <- CC
     and       a2, a0, 255                  #  a2 <- BB
     GET_VREG_F(fa1, a3)                    #  a1 <- vCC
@@ -5092,9 +5289,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     div.s fv0, fa0, fa1                                 #  f0 = result
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- fv0
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vAA <- fv0
 
 
 /* ------------------------------ */
@@ -5110,7 +5306,7 @@
 
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     srl       a3, a0, 8                    #  a3 <- CC
     and       a2, a0, 255                  #  a2 <- BB
     GET_VREG_F(fa1, a3)                    #  a1 <- vCC
@@ -5118,9 +5314,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     JAL(fmodf)                                 #  f0 = result
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- fv0
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vAA <- fv0
 
 
 /* ------------------------------ */
@@ -5129,8 +5324,8 @@
 /* File: mips/op_add_double.S */
 /* File: mips/fbinopWide.S */
     /*
-     * Generic 64-bit binary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point binary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * for: add-double, sub-double, mul-double, div-double,
@@ -5139,7 +5334,7 @@
      */
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a0, 255                  #  a2 <- BB
     srl       a3, a0, 8                    #  a3 <- CC
     EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
@@ -5149,8 +5344,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     add.d fv0, fa0, fa1
-    SET_VREG64_F(fv0, fv0f, rOBJ)
-    b         .Lop_add_double_finish
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vAA/vAA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -5159,8 +5354,8 @@
 /* File: mips/op_sub_double.S */
 /* File: mips/fbinopWide.S */
     /*
-     * Generic 64-bit binary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point binary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * for: add-double, sub-double, mul-double, div-double,
@@ -5169,7 +5364,7 @@
      */
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a0, 255                  #  a2 <- BB
     srl       a3, a0, 8                    #  a3 <- CC
     EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
@@ -5179,8 +5374,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     sub.d fv0, fa0, fa1
-    SET_VREG64_F(fv0, fv0f, rOBJ)
-    b         .Lop_sub_double_finish
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vAA/vAA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -5189,8 +5384,8 @@
 /* File: mips/op_mul_double.S */
 /* File: mips/fbinopWide.S */
     /*
-     * Generic 64-bit binary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point binary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * for: add-double, sub-double, mul-double, div-double,
@@ -5199,7 +5394,7 @@
      */
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a0, 255                  #  a2 <- BB
     srl       a3, a0, 8                    #  a3 <- CC
     EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
@@ -5209,8 +5404,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     mul.d fv0, fa0, fa1
-    SET_VREG64_F(fv0, fv0f, rOBJ)
-    b         .Lop_mul_double_finish
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vAA/vAA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -5219,8 +5414,8 @@
 /* File: mips/op_div_double.S */
 /* File: mips/fbinopWide.S */
     /*
-     * Generic 64-bit binary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point binary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * for: add-double, sub-double, mul-double, div-double,
@@ -5229,7 +5424,7 @@
      */
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a0, 255                  #  a2 <- BB
     srl       a3, a0, 8                    #  a3 <- CC
     EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
@@ -5239,8 +5434,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     div.d fv0, fa0, fa1
-    SET_VREG64_F(fv0, fv0f, rOBJ)
-    b         .Lop_div_double_finish
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vAA/vAA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -5249,8 +5444,8 @@
 /* File: mips/op_rem_double.S */
 /* File: mips/fbinopWide.S */
     /*
-     * Generic 64-bit binary operation.  Provide an "instr" line that
-     * specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point binary operation.  Provide an "instr"
+     * line that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * for: add-double, sub-double, mul-double, div-double,
@@ -5259,7 +5454,7 @@
      */
     /* binop vAA, vBB, vCC */
     FETCH(a0, 1)                           #  a0 <- CCBB
-    GET_OPA(rOBJ)                          #  s5 <- AA
+    GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a0, 255                  #  a2 <- BB
     srl       a3, a0, 8                    #  a3 <- CC
     EAS2(a2, rFP, a2)                      #  a2 <- &fp[BB]
@@ -5269,8 +5464,8 @@
 
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     JAL(fmod)
-    SET_VREG64_F(fv0, fv0f, rOBJ)
-    b         .Lop_rem_double_finish
+    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vAA/vAA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -5304,8 +5499,7 @@
                                   #  optional op
     addu a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -5339,8 +5533,7 @@
                                   #  optional op
     subu a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -5374,8 +5567,7 @@
                                   #  optional op
     mul a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -5410,8 +5602,7 @@
                                   #  optional op
     div a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 #else
 /* File: mips/binop2addr.S */
@@ -5441,8 +5632,7 @@
     div zero, a0, a1                              #  optional op
     mflo a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 #endif
 
@@ -5478,8 +5668,7 @@
                                   #  optional op
     mod a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 #else
 /* File: mips/binop2addr.S */
@@ -5509,8 +5698,7 @@
     div zero, a0, a1                              #  optional op
     mfhi a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 #endif
 
@@ -5545,8 +5733,7 @@
                                   #  optional op
     and a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -5580,8 +5767,7 @@
                                   #  optional op
     or a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -5615,8 +5801,7 @@
                                   #  optional op
     xor a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -5650,8 +5835,7 @@
                                   #  optional op
     sll a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -5685,8 +5869,7 @@
                                   #  optional op
     sra a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -5720,8 +5903,7 @@
                                   #  optional op
     srl a0, a0, a1                                  #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -5736,22 +5918,21 @@
      * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vB (a2-a3).  Useful for integer division and modulus.
      *
      * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
      *      and-long/2addr, or-long/2addr, xor-long/2addr
-     *      rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a1)                            #  a1 <- B
     EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
     EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
-    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
-    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vB/vB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vA/vA+1
     .if 0
     or        t0, a2, a3             #  second arg (a2-a3) is zero?
     beqz      t0, common_errDivideByZero
@@ -5761,9 +5942,7 @@
     addu v0, a2, a0                              #  optional op
     addu a1, a3, a1; sltu v1, v0, a2; addu v1, v1, a1                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(v0, v1, rOBJ)   #  vAA/vAA+1 <- v0/v1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-15 instructions */
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vA/vA+1 <- v0/v1
 
 
 /* ------------------------------ */
@@ -5778,22 +5957,21 @@
      * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vB (a2-a3).  Useful for integer division and modulus.
      *
      * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
      *      and-long/2addr, or-long/2addr, xor-long/2addr
-     *      rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a1)                            #  a1 <- B
     EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
     EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
-    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
-    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vB/vB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vA/vA+1
     .if 0
     or        t0, a2, a3             #  second arg (a2-a3) is zero?
     beqz      t0, common_errDivideByZero
@@ -5803,9 +5981,7 @@
     subu v0, a0, a2                              #  optional op
     subu v1, a1, a3; sltu a0, a0, v0; subu v1, v1, a0                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(v0, v1, rOBJ)   #  vAA/vAA+1 <- v0/v1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-15 instructions */
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vA/vA+1 <- v0/v1
 
 
 /* ------------------------------ */
@@ -5840,9 +6016,7 @@
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t1)                    #  extract opcode from rINST
-    # vAA <- v0 (low)
-    SET_VREG64(v0, v1, rOBJ)               #  vAA+1 <- v1 (high)
-    GOTO_OPCODE(t1)                        #  jump to next instruction
+    SET_VREG64_GOTO(v0, v1, rOBJ, t1)      #  vA/vA+1 <- v0(low)/v1(high)
 
 /* ------------------------------ */
     .balign 128
@@ -5853,22 +6027,21 @@
      * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vB (a2-a3).  Useful for integer division and modulus.
      *
      * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
      *      and-long/2addr, or-long/2addr, xor-long/2addr
-     *      rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a1)                            #  a1 <- B
     EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
     EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
-    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
-    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vB/vB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vA/vA+1
     .if 1
     or        t0, a2, a3             #  second arg (a2-a3) is zero?
     beqz      t0, common_errDivideByZero
@@ -5878,9 +6051,7 @@
                                   #  optional op
     JAL(__divdi3)                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(v0, v1, rOBJ)   #  vAA/vAA+1 <- v0/v1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-15 instructions */
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vA/vA+1 <- v0/v1
 
 
 /* ------------------------------ */
@@ -5892,22 +6063,21 @@
      * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vB (a2-a3).  Useful for integer division and modulus.
      *
      * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
      *      and-long/2addr, or-long/2addr, xor-long/2addr
-     *      rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a1)                            #  a1 <- B
     EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
     EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
-    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
-    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vB/vB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vA/vA+1
     .if 1
     or        t0, a2, a3             #  second arg (a2-a3) is zero?
     beqz      t0, common_errDivideByZero
@@ -5917,9 +6087,7 @@
                                   #  optional op
     JAL(__moddi3)                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(v0, v1, rOBJ)   #  vAA/vAA+1 <- v0/v1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-15 instructions */
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)   #  vA/vA+1 <- v0/v1
 
 
 /* ------------------------------ */
@@ -5931,22 +6099,21 @@
      * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vB (a2-a3).  Useful for integer division and modulus.
      *
      * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
      *      and-long/2addr, or-long/2addr, xor-long/2addr
-     *      rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a1)                            #  a1 <- B
     EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
     EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
-    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
-    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vB/vB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vA/vA+1
     .if 0
     or        t0, a2, a3             #  second arg (a2-a3) is zero?
     beqz      t0, common_errDivideByZero
@@ -5956,9 +6123,7 @@
     and a0, a0, a2                              #  optional op
     and a1, a1, a3                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, rOBJ)   #  vAA/vAA+1 <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-15 instructions */
+    SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vA/vA+1 <- a0/a1
 
 
 /* ------------------------------ */
@@ -5970,22 +6135,21 @@
      * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vB (a2-a3).  Useful for integer division and modulus.
      *
      * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
      *      and-long/2addr, or-long/2addr, xor-long/2addr
-     *      rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a1)                            #  a1 <- B
     EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
     EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
-    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
-    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vB/vB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vA/vA+1
     .if 0
     or        t0, a2, a3             #  second arg (a2-a3) is zero?
     beqz      t0, common_errDivideByZero
@@ -5995,9 +6159,7 @@
     or a0, a0, a2                              #  optional op
     or a1, a1, a3                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, rOBJ)   #  vAA/vAA+1 <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-15 instructions */
+    SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vA/vA+1 <- a0/a1
 
 
 /* ------------------------------ */
@@ -6009,22 +6171,21 @@
      * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
      * that specifies an instruction that performs "result = a0-a1 op a2-a3".
      * This could be a MIPS instruction or a function call.  (If the result
-     * comes back in a register other than a0, you can override "result".)
+     * comes back in a register pair other than a0-a1, you can override "result".)
      *
      * If "chkzero" is set to 1, we perform a divide-by-zero check on
-     * vCC (a1).  Useful for integer division and modulus.
+     * vB (a2-a3).  Useful for integer division and modulus.
      *
      * For: add-long/2addr, sub-long/2addr, div-long/2addr, rem-long/2addr,
      *      and-long/2addr, or-long/2addr, xor-long/2addr
-     *      rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a1)                            #  a1 <- B
     EAS2(a1, rFP, a1)                      #  a1 <- &fp[B]
     EAS2(t0, rFP, rOBJ)                    #  t0 <- &fp[A]
-    LOAD64(a2, a3, a1)               #  a2/a3 <- vBB/vBB+1
-    LOAD64(a0, a1, t0)               #  a0/a1 <- vAA/vAA+1
+    LOAD64(a2, a3, a1)               #  a2/a3 <- vB/vB+1
+    LOAD64(a0, a1, t0)               #  a0/a1 <- vA/vA+1
     .if 0
     or        t0, a2, a3             #  second arg (a2-a3) is zero?
     beqz      t0, common_errDivideByZero
@@ -6034,9 +6195,7 @@
     xor a0, a0, a2                              #  optional op
     xor a1, a1, a3                                 #  result <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, rOBJ)   #  vAA/vAA+1 <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-    /* 12-15 instructions */
+    SET_VREG64_GOTO(a0, a1, rOBJ, t0)   #  vA/vA+1 <- a0/a1
 
 
 /* ------------------------------ */
@@ -6052,7 +6211,7 @@
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG(a2, a3)                       #  a2 <- vB
     EAS2(t2, rFP, rOBJ)                    #  t2 <- &fp[A]
-    LOAD64(a0, a1, t2)                     #  a0/a1 <- vAA/vAA+1
+    LOAD64(a0, a1, t2)                     #  a0/a1 <- vA/vA+1
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
@@ -6065,7 +6224,7 @@
     srl     a0, v1                         #  alo<- alo >> (32-(shift&31))
     sll     v1, a1, a2                     #  rhi<- ahi << (shift&31)
     or      v1, a0                         #  rhi<- rhi | alo
-    SET_VREG64_GOTO(v0, v1, rOBJ, t0)      #  vAA/vAA+1 <- a0/a1
+    SET_VREG64_GOTO(v0, v1, rOBJ, t0)      #  vA/vA+1 <- v0/v1
 
 /* ------------------------------ */
     .balign 128
@@ -6080,7 +6239,7 @@
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG(a2, a3)                       #  a2 <- vB
     EAS2(t0, rFP, t2)                      #  t0 <- &fp[A]
-    LOAD64(a0, a1, t0)                     #  a0/a1 <- vAA/vAA+1
+    LOAD64(a0, a1, t0)                     #  a0/a1 <- vA/vA+1
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
 
@@ -6092,7 +6251,7 @@
     sll     a1, 1
     sll     a1, a0                         #  ahi<- ahi << (32-(shift&31))
     or      v0, a1                         #  rlo<- rlo | ahi
-    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vAA/vAA+1 <- a0/a1
+    SET_VREG64_GOTO(v0, v1, t2, t0)        #  vA/vA+1 <- v0/v1
 
 /* ------------------------------ */
     .balign 128
@@ -6107,7 +6266,7 @@
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG(a2, a3)                       #  a2 <- vB
     EAS2(t0, rFP, t3)                      #  t0 <- &fp[A]
-    LOAD64(a0, a1, t0)                     #  a0/a1 <- vAA/vAA+1
+    LOAD64(a0, a1, t0)                     #  a0/a1 <- vA/vA+1
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
@@ -6120,7 +6279,7 @@
     sll       a1, 1
     sll       a1, a0                       #  ahi<- ahi << (32-(shift&31))
     or        v0, a1                       #  rlo<- rlo | ahi
-    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vAA/vAA+1 <- a0/a1
+    SET_VREG64_GOTO(v0, v1, t3, t0)        #  vA/vA+1 <- v0/v1
 
 /* ------------------------------ */
     .balign 128
@@ -6129,23 +6288,22 @@
 /* File: mips/fbinop2addr.S */
     /*
      * Generic 32-bit "/2addr" binary operation.  Provide an "instr"
-     * that specifies an instruction that performs "result = a0 op a1".
+     * that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-float/2addr, sub-float/2addr, mul-float/2addr,
-     * div-float/2addr, rem-float/2addr
+     *      div-float/2addr, rem-float/2addr
      */
     /* binop/2addr vA, vB */
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG_F(fa0, rOBJ)
     GET_VREG_F(fa1, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
     add.s fv0, fa0, fa1
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- result
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- result
 
 
 /* ------------------------------ */
@@ -6155,23 +6313,22 @@
 /* File: mips/fbinop2addr.S */
     /*
      * Generic 32-bit "/2addr" binary operation.  Provide an "instr"
-     * that specifies an instruction that performs "result = a0 op a1".
+     * that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-float/2addr, sub-float/2addr, mul-float/2addr,
-     * div-float/2addr, rem-float/2addr
+     *      div-float/2addr, rem-float/2addr
      */
     /* binop/2addr vA, vB */
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG_F(fa0, rOBJ)
     GET_VREG_F(fa1, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
     sub.s fv0, fa0, fa1
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- result
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- result
 
 
 /* ------------------------------ */
@@ -6181,23 +6338,22 @@
 /* File: mips/fbinop2addr.S */
     /*
      * Generic 32-bit "/2addr" binary operation.  Provide an "instr"
-     * that specifies an instruction that performs "result = a0 op a1".
+     * that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-float/2addr, sub-float/2addr, mul-float/2addr,
-     * div-float/2addr, rem-float/2addr
+     *      div-float/2addr, rem-float/2addr
      */
     /* binop/2addr vA, vB */
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG_F(fa0, rOBJ)
     GET_VREG_F(fa1, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
     mul.s fv0, fa0, fa1
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- result
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- result
 
 
 /* ------------------------------ */
@@ -6207,23 +6363,22 @@
 /* File: mips/fbinop2addr.S */
     /*
      * Generic 32-bit "/2addr" binary operation.  Provide an "instr"
-     * that specifies an instruction that performs "result = a0 op a1".
+     * that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-float/2addr, sub-float/2addr, mul-float/2addr,
-     * div-float/2addr, rem-float/2addr
+     *      div-float/2addr, rem-float/2addr
      */
     /* binop/2addr vA, vB */
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG_F(fa0, rOBJ)
     GET_VREG_F(fa1, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
     div.s fv0, fa0, fa1
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- result
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- result
 
 
 /* ------------------------------ */
@@ -6233,23 +6388,22 @@
 /* File: mips/fbinop2addr.S */
     /*
      * Generic 32-bit "/2addr" binary operation.  Provide an "instr"
-     * that specifies an instruction that performs "result = a0 op a1".
+     * that specifies an instruction that performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-float/2addr, sub-float/2addr, mul-float/2addr,
-     * div-float/2addr, rem-float/2addr
+     *      div-float/2addr, rem-float/2addr
      */
     /* binop/2addr vA, vB */
-    GET_OPA4(rOBJ)                         #  t1 <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_OPB(a3)                            #  a3 <- B
     GET_VREG_F(fa0, rOBJ)
     GET_VREG_F(fa1, a3)
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
 
     JAL(fmodf)
-    SET_VREG_F(fv0, rOBJ)                  #  vAA <- result
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_F_GOTO(fv0, rOBJ, t0)         #  vA <- result
 
 
 /* ------------------------------ */
@@ -6258,12 +6412,13 @@
 /* File: mips/op_add_double_2addr.S */
 /* File: mips/fbinopWide2addr.S */
     /*
-     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point "/2addr" binary operation.
+     * Provide an "instr" line that specifies an instruction that
+     * performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
-     *  div-double/2addr, rem-double/2addr
+     *      div-double/2addr, rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -6275,9 +6430,8 @@
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     add.d fv0, fa0, fa1
-    SET_VREG64_F(fv0, fv0f, rOBJ)
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vA/vA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -6286,12 +6440,13 @@
 /* File: mips/op_sub_double_2addr.S */
 /* File: mips/fbinopWide2addr.S */
     /*
-     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point "/2addr" binary operation.
+     * Provide an "instr" line that specifies an instruction that
+     * performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
-     *  div-double/2addr, rem-double/2addr
+     *      div-double/2addr, rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -6303,9 +6458,8 @@
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     sub.d fv0, fa0, fa1
-    SET_VREG64_F(fv0, fv0f, rOBJ)
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vA/vA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -6314,12 +6468,13 @@
 /* File: mips/op_mul_double_2addr.S */
 /* File: mips/fbinopWide2addr.S */
     /*
-     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point "/2addr" binary operation.
+     * Provide an "instr" line that specifies an instruction that
+     * performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
-     *  div-double/2addr, rem-double/2addr
+     *      div-double/2addr, rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -6331,9 +6486,8 @@
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     mul.d fv0, fa0, fa1
-    SET_VREG64_F(fv0, fv0f, rOBJ)
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vA/vA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -6342,12 +6496,13 @@
 /* File: mips/op_div_double_2addr.S */
 /* File: mips/fbinopWide2addr.S */
     /*
-     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point "/2addr" binary operation.
+     * Provide an "instr" line that specifies an instruction that
+     * performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
-     *  div-double/2addr, rem-double/2addr
+     *      div-double/2addr, rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -6359,9 +6514,8 @@
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     div.d fv0, fa0, fa1
-    SET_VREG64_F(fv0, fv0f, rOBJ)
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vA/vA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -6370,12 +6524,13 @@
 /* File: mips/op_rem_double_2addr.S */
 /* File: mips/fbinopWide2addr.S */
     /*
-     * Generic 64-bit "/2addr" binary operation.  Provide an "instr" line
-     * that specifies an instruction that performs "result = a0-a1 op a2-a3".
+     * Generic 64-bit floating-point "/2addr" binary operation.
+     * Provide an "instr" line that specifies an instruction that
+     * performs "fv0 = fa0 op fa1".
      * This could be an MIPS instruction or a function call.
      *
      * For: add-double/2addr, sub-double/2addr, mul-double/2addr,
-     *  div-double/2addr, rem-double/2addr
+     *      div-double/2addr, rem-double/2addr
      */
     /* binop/2addr vA, vB */
     GET_OPA4(rOBJ)                         #  rOBJ <- A+
@@ -6387,9 +6542,8 @@
 
     FETCH_ADVANCE_INST(1)                  #  advance rPC, load rINST
     JAL(fmod)
-    SET_VREG64_F(fv0, fv0f, rOBJ)
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_F_GOTO(fv0, fv0f, rOBJ, t0)  #  vA/vA+1 <- fv0
 
 
 /* ------------------------------ */
@@ -6409,12 +6563,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 0
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6424,8 +6577,7 @@
                                   #  optional op
     addu a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -6446,12 +6598,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 0
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6461,8 +6612,7 @@
                                   #  optional op
     subu a0, a1, a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -6482,12 +6632,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 0
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6497,8 +6646,7 @@
                                   #  optional op
     mul a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -6519,12 +6667,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 1
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6534,8 +6681,7 @@
                                   #  optional op
     div a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 #else
 /* File: mips/binopLit16.S */
@@ -6551,12 +6697,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 1
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6566,8 +6711,7 @@
     div zero, a0, a1                              #  optional op
     mflo a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 #endif
 
@@ -6589,12 +6733,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 1
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6604,8 +6747,7 @@
                                   #  optional op
     mod a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 #else
 /* File: mips/binopLit16.S */
@@ -6621,12 +6763,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 1
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6636,8 +6777,7 @@
     div zero, a0, a1                              #  optional op
     mfhi a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 #endif
 
@@ -6658,12 +6798,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 0
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6673,8 +6812,7 @@
                                   #  optional op
     and a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -6694,12 +6832,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 0
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6709,8 +6846,7 @@
                                   #  optional op
     or a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -6730,12 +6866,11 @@
      * For: add-int/lit16, rsub-int, mul-int/lit16, div-int/lit16,
      *      rem-int/lit16, and-int/lit16, or-int/lit16, xor-int/lit16
      */
-    # binop/lit16 vA, vB,                  /* +CCCC */
+    /* binop/lit16 vA, vB, +CCCC */
     FETCH_S(a1, 1)                         #  a1 <- ssssCCCC (sign-extended)
     GET_OPB(a2)                            #  a2 <- B
-    GET_OPA(rOBJ)                          #  rOBJ <- A+
+    GET_OPA4(rOBJ)                         #  rOBJ <- A+
     GET_VREG(a0, a2)                       #  a0 <- vB
-    and       rOBJ, rOBJ, 15
     .if 0
     # cmp a1, 0; is second operand zero?
     beqz      a1, common_errDivideByZero
@@ -6745,8 +6880,7 @@
                                   #  optional op
     xor a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-13 instructions */
+    SET_VREG_GOTO(a0, rOBJ, t0)       #  vA <- a0
 
 
 /* ------------------------------ */
@@ -6767,7 +6901,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -6783,7 +6917,6 @@
     addu a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 
 /* ------------------------------ */
@@ -6804,7 +6937,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -6820,7 +6953,6 @@
     subu a0, a1, a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 
 /* ------------------------------ */
@@ -6841,7 +6973,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -6857,7 +6989,6 @@
     mul a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 
 /* ------------------------------ */
@@ -6879,7 +7010,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -6895,7 +7026,6 @@
     div a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 #else
 /* File: mips/binopLit8.S */
@@ -6912,7 +7042,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -6928,7 +7058,6 @@
     mflo a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 #endif
 
@@ -6951,7 +7080,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -6967,7 +7096,6 @@
     mod a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 #else
 /* File: mips/binopLit8.S */
@@ -6984,7 +7112,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -7000,7 +7128,6 @@
     mfhi a0                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 #endif
 
@@ -7022,7 +7149,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -7038,7 +7165,6 @@
     and a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 
 /* ------------------------------ */
@@ -7059,7 +7185,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -7075,7 +7201,6 @@
     or a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 
 /* ------------------------------ */
@@ -7096,7 +7221,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -7112,7 +7237,6 @@
     xor a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 
 /* ------------------------------ */
@@ -7133,7 +7257,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -7149,7 +7273,6 @@
     sll a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 
 /* ------------------------------ */
@@ -7170,7 +7293,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -7186,7 +7309,6 @@
     sra a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 
 /* ------------------------------ */
@@ -7207,7 +7329,7 @@
      *      rem-int/lit8, and-int/lit8, or-int/lit8, xor-int/lit8,
      *      shl-int/lit8, shr-int/lit8, ushr-int/lit8
      */
-    # binop/lit8 vAA, vBB,                 /* +CC */
+    /* binop/lit8 vAA, vBB, +CC */
     FETCH_S(a3, 1)                         #  a3 <- ssssCCBB (sign-extended for CC)
     GET_OPA(rOBJ)                          #  rOBJ <- AA
     and       a2, a3, 255                  #  a2 <- BB
@@ -7223,7 +7345,6 @@
     srl a0, a0, a1                                 #  a0 <- op, a0-a3 changed
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
     SET_VREG_GOTO(a0, rOBJ, t0)       #  vAA <- a0
-    /* 10-12 instructions */
 
 
 /* ------------------------------ */
@@ -7231,7 +7352,7 @@
 .L_op_iget_quick: /* 0xe3 */
 /* File: mips/op_iget_quick.S */
     /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- object we're operating on
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7248,7 +7369,7 @@
     .balign 128
 .L_op_iget_wide_quick: /* 0xe4 */
 /* File: mips/op_iget_wide_quick.S */
-    # iget-wide-quick vA, vB, offset       /* CCCC */
+    /* iget-wide-quick vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- object we're operating on
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7259,8 +7380,7 @@
     LOAD64(a0, a1, t0)                     #  a0 <- obj.field (64 bits, aligned)
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(a0, a1, a2)                 #  fp[A] <- a0/a1
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(a0, a1, a2, t0)        #  fp[A] <- a0/a1
 
 /* ------------------------------ */
     .balign 128
@@ -7277,17 +7397,16 @@
     GET_OPA4(a2)                           #  a2<- A+
     PREFETCH_INST(2)                       #  load rINST
     bnez a3, MterpPossibleException        #  bail out
-    SET_VREG_OBJECT(v0, a2)                #  fp[A] <- v0
     ADVANCE(2)                             #  advance rPC
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG_OBJECT_GOTO(v0, a2, t0)       #  fp[A] <- v0
 
 /* ------------------------------ */
     .balign 128
 .L_op_iput_quick: /* 0xe6 */
 /* File: mips/op_iput_quick.S */
     /* For: iput-quick, iput-object-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- fp[B], the object pointer
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7296,15 +7415,16 @@
     GET_VREG(a0, a2)                       #  a0 <- fp[A]
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     addu      t0, a3, a1
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t1)
     sw    a0, 0(t0)                    #  obj.field (8/16/32 bits) <- a0
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t1)                                 #  jump to next instruction
 
 /* ------------------------------ */
     .balign 128
 .L_op_iput_wide_quick: /* 0xe7 */
 /* File: mips/op_iput_wide_quick.S */
-    # iput-wide-quick vA, vB, offset       /* CCCC */
+    /* iput-wide-quick vA, vB, offset@CCCC */
     GET_OPA4(a0)                           #  a0 <- A(+)
     GET_OPB(a1)                            #  a1 <- B
     GET_VREG(a2, a1)                       #  a2 <- fp[B], the object pointer
@@ -7315,16 +7435,17 @@
     FETCH(a3, 1)                           #  a3 <- field byte offset
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     addu      a2, a2, a3                   #  obj.field (64 bits, aligned) <- a0/a1
-    STORE64(a0, a1, a2)                    #  obj.field (64 bits, aligned) <- a0/a1
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    GET_OPCODE_TARGET(t0)
+    STORE64(a0, a1, a2)                    #  obj.field (64 bits, aligned) <- a0/a1
+    JR(t0)                                 #  jump to next instruction
 
 /* ------------------------------ */
     .balign 128
 .L_op_iput_object_quick: /* 0xe8 */
 /* File: mips/op_iput_object_quick.S */
     /* For: iput-object-quick */
-    # op vA, vB, offset                 /* CCCC */
+    /* op vA, vB, offset@CCCC */
     EXPORT_PC()
     addu   a0, rFP, OFF_FP_SHADOWFRAME
     move   a1, rPC
@@ -7343,8 +7464,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeVirtualQuick
     EXPORT_PC()
     move    a0, rSELF
@@ -7368,8 +7489,8 @@
     /*
      * Generic invoke handler wrapper.
      */
-    # op vB, {vD, vE, vF, vG, vA}, class   /* CCCC */
-    # op {vCCCC..v(CCCC+AA-1)}, meth       /* BBBB */
+    /* op vB, {vD, vE, vF, vG, vA}, class@CCCC */
+    /* op {vCCCC..v(CCCC+AA-1)}, meth@BBBB */
     .extern MterpInvokeVirtualQuickRange
     EXPORT_PC()
     move    a0, rSELF
@@ -7391,7 +7512,7 @@
 /* File: mips/op_iput_boolean_quick.S */
 /* File: mips/op_iput_quick.S */
     /* For: iput-quick, iput-object-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- fp[B], the object pointer
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7400,9 +7521,10 @@
     GET_VREG(a0, a2)                       #  a0 <- fp[A]
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     addu      t0, a3, a1
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t1)
     sb    a0, 0(t0)                    #  obj.field (8/16/32 bits) <- a0
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t1)                                 #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -7411,7 +7533,7 @@
 /* File: mips/op_iput_byte_quick.S */
 /* File: mips/op_iput_quick.S */
     /* For: iput-quick, iput-object-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- fp[B], the object pointer
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7420,9 +7542,10 @@
     GET_VREG(a0, a2)                       #  a0 <- fp[A]
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     addu      t0, a3, a1
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t1)
     sb    a0, 0(t0)                    #  obj.field (8/16/32 bits) <- a0
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t1)                                 #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -7431,7 +7554,7 @@
 /* File: mips/op_iput_char_quick.S */
 /* File: mips/op_iput_quick.S */
     /* For: iput-quick, iput-object-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- fp[B], the object pointer
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7440,9 +7563,10 @@
     GET_VREG(a0, a2)                       #  a0 <- fp[A]
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     addu      t0, a3, a1
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t1)
     sh    a0, 0(t0)                    #  obj.field (8/16/32 bits) <- a0
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t1)                                 #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -7451,7 +7575,7 @@
 /* File: mips/op_iput_short_quick.S */
 /* File: mips/op_iput_quick.S */
     /* For: iput-quick, iput-object-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- fp[B], the object pointer
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7460,9 +7584,10 @@
     GET_VREG(a0, a2)                       #  a0 <- fp[A]
     FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
     addu      t0, a3, a1
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+    GET_OPCODE_TARGET(t1)
     sh    a0, 0(t0)                    #  obj.field (8/16/32 bits) <- a0
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    JR(t1)                                 #  jump to next instruction
 
 
 /* ------------------------------ */
@@ -7471,7 +7596,7 @@
 /* File: mips/op_iget_boolean_quick.S */
 /* File: mips/op_iget_quick.S */
     /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- object we're operating on
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7491,7 +7616,7 @@
 /* File: mips/op_iget_byte_quick.S */
 /* File: mips/op_iget_quick.S */
     /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- object we're operating on
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7511,7 +7636,7 @@
 /* File: mips/op_iget_char_quick.S */
 /* File: mips/op_iget_quick.S */
     /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- object we're operating on
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7531,7 +7656,7 @@
 /* File: mips/op_iget_short_quick.S */
 /* File: mips/op_iget_quick.S */
     /* For: iget-quick, iget-boolean-quick, iget-byte-quick, iget-char-quick, iget-short-quick */
-    # op vA, vB, offset                    /* CCCC */
+    /* op vA, vB, offset@CCCC */
     GET_OPB(a2)                            #  a2 <- B
     GET_VREG(a3, a2)                       #  a3 <- object we're operating on
     FETCH(a1, 1)                           #  a1 <- field byte offset
@@ -7624,25 +7749,15 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fa: /* 0xfa */
-/* File: mips/op_unused_fa.S */
-/* File: mips/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
-  b MterpFallback
-
+.L_op_invoke_polymorphic: /* 0xfa */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fb: /* 0xfb */
-/* File: mips/op_unused_fb.S */
-/* File: mips/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
-  b MterpFallback
-
+.L_op_invoke_polymorphic_range: /* 0xfb */
+/* Transfer stub to alternate interpreter */
+    b    MterpFallback
 
 /* ------------------------------ */
     .balign 128
@@ -7704,264 +7819,29 @@
     .balign 4
 artMterpAsmSisterStart:
 
-/* continuation for op_cmpl_float */
-
-.Lop_cmpl_float_nan:
-    li rTEMP, -1
-
-.Lop_cmpl_float_finish:
-    GET_OPA(rOBJ)
-    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
-
-/* continuation for op_cmpg_float */
-
-.Lop_cmpg_float_nan:
-    li rTEMP, 1
-
-.Lop_cmpg_float_finish:
-    GET_OPA(rOBJ)
-    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
-
-/* continuation for op_cmpl_double */
-
-.Lop_cmpl_double_nan:
-    li rTEMP, -1
-
-.Lop_cmpl_double_finish:
-    GET_OPA(rOBJ)
-    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
-
-/* continuation for op_cmpg_double */
-
-.Lop_cmpg_double_nan:
-    li rTEMP, 1
-
-.Lop_cmpg_double_finish:
-    GET_OPA(rOBJ)
-    FETCH_ADVANCE_INST(2)                  #  advance rPC, load rINST
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG_GOTO(rTEMP, rOBJ, t0)         #  vAA <- rTEMP
-
-/* continuation for op_float_to_int */
-
-/*
- * Not an entry point as it is used only once !!
- */
-f2i_doconv:
-#ifdef MIPS32REVGE6
-    l.s       fa1, .LFLOAT_TO_INT_max
-    cmp.ule.s ft2, fa1, fa0
-    l.s       fv0, .LFLOAT_TO_INT_ret_max
-    bc1nez    ft2, .Lop_float_to_int_set_vreg_f
-
-    l.s       fa1, .LFLOAT_TO_INT_min
-    cmp.ule.s ft2, fa0, fa1
-    l.s       fv0, .LFLOAT_TO_INT_ret_min
-    bc1nez    ft2, .Lop_float_to_int_set_vreg_f
-
-    mov.s     fa1, fa0
-    cmp.un.s  ft2, fa0, fa1
-    li.s      fv0, 0
-    bc1nez    ft2, .Lop_float_to_int_set_vreg_f
-#else
-    l.s       fa1, .LFLOAT_TO_INT_max
-    c.ole.s   fcc0, fa1, fa0
-    l.s       fv0, .LFLOAT_TO_INT_ret_max
-    bc1t      .Lop_float_to_int_set_vreg_f
-
-    l.s       fa1, .LFLOAT_TO_INT_min
-    c.ole.s   fcc0, fa0, fa1
-    l.s       fv0, .LFLOAT_TO_INT_ret_min
-    bc1t      .Lop_float_to_int_set_vreg_f
-
-    mov.s     fa1, fa0
-    c.un.s    fcc0, fa0, fa1
-    li.s      fv0, 0
-    bc1t      .Lop_float_to_int_set_vreg_f
-#endif
-
-    trunc.w.s  fv0, fa0
-    b         .Lop_float_to_int_set_vreg_f
-
-.LFLOAT_TO_INT_max:
-    .word 0x4f000000
-.LFLOAT_TO_INT_min:
-    .word 0xcf000000
-.LFLOAT_TO_INT_ret_max:
-    .word 0x7fffffff
-.LFLOAT_TO_INT_ret_min:
-    .word 0x80000000
-
 /* continuation for op_float_to_long */
 
-f2l_doconv:
-#ifdef MIPS32REVGE6
-    l.s       fa1, .LLONG_TO_max
-    cmp.ule.s ft2, fa1, fa0
-    li        rRESULT0, ~0
-    li        rRESULT1, ~0x80000000
-    bc1nez    ft2, .Lop_float_to_long_set_vreg
-
-    l.s       fa1, .LLONG_TO_min
-    cmp.ule.s ft2, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0x80000000
-    bc1nez    ft2, .Lop_float_to_long_set_vreg
-
-    mov.s     fa1, fa0
-    cmp.un.s  ft2, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0
-    bc1nez    ft2, .Lop_float_to_long_set_vreg
-#else
-    l.s       fa1, .LLONG_TO_max
-    c.ole.s   fcc0, fa1, fa0
-    li        rRESULT0, ~0
-    li        rRESULT1, ~0x80000000
-    bc1t      .Lop_float_to_long_set_vreg
-
-    l.s       fa1, .LLONG_TO_min
-    c.ole.s   fcc0, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0x80000000
-    bc1t      .Lop_float_to_long_set_vreg
-
-    mov.s     fa1, fa0
-    c.un.s    fcc0, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0
-    bc1t      .Lop_float_to_long_set_vreg
+#ifndef MIPS32REVGE6
+.Lop_float_to_long_get_opcode:
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+.Lop_float_to_long_set_vreg:
+    SET_VREG64_GOTO(rRESULT0, rRESULT1, rOBJ, t1)   #  vA/vA+1 <- v0/v1
 #endif
 
-    JAL(__fixsfdi)
-
-    b         .Lop_float_to_long_set_vreg
-
-.LLONG_TO_max:
-    .word 0x5f000000
-
-.LLONG_TO_min:
-    .word 0xdf000000
-
-/* continuation for op_double_to_int */
-
-d2i_doconv:
-#ifdef MIPS32REVGE6
-    la        t0, .LDOUBLE_TO_INT_max
-    LOAD64_F(fa1, fa1f, t0)
-    cmp.ule.d ft2, fa1, fa0
-    l.s       fv0, .LDOUBLE_TO_INT_maxret
-    bc1nez    ft2, .Lop_double_to_int_set_vreg_f
-
-    la        t0, .LDOUBLE_TO_INT_min
-    LOAD64_F(fa1, fa1f, t0)
-    cmp.ule.d ft2, fa0, fa1
-    l.s       fv0, .LDOUBLE_TO_INT_minret
-    bc1nez    ft2, .Lop_double_to_int_set_vreg_f
-
-    mov.d     fa1, fa0
-    cmp.un.d  ft2, fa0, fa1
-    li.s      fv0, 0
-    bc1nez    ft2, .Lop_double_to_int_set_vreg_f
-#else
-    la        t0, .LDOUBLE_TO_INT_max
-    LOAD64_F(fa1, fa1f, t0)
-    c.ole.d   fcc0, fa1, fa0
-    l.s       fv0, .LDOUBLE_TO_INT_maxret
-    bc1t      .Lop_double_to_int_set_vreg_f
-
-    la        t0, .LDOUBLE_TO_INT_min
-    LOAD64_F(fa1, fa1f, t0)
-    c.ole.d   fcc0, fa0, fa1
-    l.s       fv0, .LDOUBLE_TO_INT_minret
-    bc1t      .Lop_double_to_int_set_vreg_f
-
-    mov.d     fa1, fa0
-    c.un.d    fcc0, fa0, fa1
-    li.s      fv0, 0
-    bc1t      .Lop_double_to_int_set_vreg_f
-#endif
-
-    trunc.w.d  fv0, fa0
-    b         .Lop_double_to_int_set_vreg_f
-
-.LDOUBLE_TO_INT_max:
-    .dword 0x41dfffffffc00000
-.LDOUBLE_TO_INT_min:
-    .dword 0xc1e0000000000000              #  minint, as a double (high word)
-.LDOUBLE_TO_INT_maxret:
-    .word 0x7fffffff
-.LDOUBLE_TO_INT_minret:
-    .word 0x80000000
-
 /* continuation for op_double_to_long */
 
-d2l_doconv:
-#ifdef MIPS32REVGE6
-    la        t0, .LDOUBLE_TO_LONG_max
-    LOAD64_F(fa1, fa1f, t0)
-    cmp.ule.d ft2, fa1, fa0
-    la        t0, .LDOUBLE_TO_LONG_ret_max
-    LOAD64(rRESULT0, rRESULT1, t0)
-    bc1nez    ft2, .Lop_double_to_long_set_vreg
-
-    la        t0, .LDOUBLE_TO_LONG_min
-    LOAD64_F(fa1, fa1f, t0)
-    cmp.ule.d ft2, fa0, fa1
-    la        t0, .LDOUBLE_TO_LONG_ret_min
-    LOAD64(rRESULT0, rRESULT1, t0)
-    bc1nez    ft2, .Lop_double_to_long_set_vreg
-
-    mov.d     fa1, fa0
-    cmp.un.d  ft2, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0
-    bc1nez    ft2, .Lop_double_to_long_set_vreg
-#else
-    la        t0, .LDOUBLE_TO_LONG_max
-    LOAD64_F(fa1, fa1f, t0)
-    c.ole.d   fcc0, fa1, fa0
-    la        t0, .LDOUBLE_TO_LONG_ret_max
-    LOAD64(rRESULT0, rRESULT1, t0)
-    bc1t      .Lop_double_to_long_set_vreg
-
-    la        t0, .LDOUBLE_TO_LONG_min
-    LOAD64_F(fa1, fa1f, t0)
-    c.ole.d   fcc0, fa0, fa1
-    la        t0, .LDOUBLE_TO_LONG_ret_min
-    LOAD64(rRESULT0, rRESULT1, t0)
-    bc1t      .Lop_double_to_long_set_vreg
-
-    mov.d     fa1, fa0
-    c.un.d    fcc0, fa0, fa1
-    li        rRESULT0, 0
-    li        rRESULT1, 0
-    bc1t      .Lop_double_to_long_set_vreg
+#ifndef MIPS32REVGE6
+.Lop_double_to_long_get_opcode:
+    GET_INST_OPCODE(t1)                    #  extract opcode from rINST
+.Lop_double_to_long_set_vreg:
+    SET_VREG64_GOTO(rRESULT0, rRESULT1, rOBJ, t1)   #  vA/vA+1 <- v0/v1
 #endif
-    JAL(__fixdfdi)
-    b         .Lop_double_to_long_set_vreg
-
-.LDOUBLE_TO_LONG_max:
-    .dword 0x43e0000000000000              #  maxlong, as a double (high word)
-.LDOUBLE_TO_LONG_min:
-    .dword 0xc3e0000000000000              #  minlong, as a double (high word)
-.LDOUBLE_TO_LONG_ret_max:
-    .dword 0x7fffffffffffffff
-.LDOUBLE_TO_LONG_ret_min:
-    .dword 0x8000000000000000
 
 /* continuation for op_mul_long */
 
 .Lop_mul_long_finish:
     GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    SET_VREG64(v0, v1, a0)                 #  vAA::vAA+1 <- v0(low) :: v1(high)
-    GOTO_OPCODE(t0)                        #  jump to next instruction
+    SET_VREG64_GOTO(v0, v1, a0, t0)        #  vAA/vAA+1 <- v0(low)/v1(high)
 
 /* continuation for op_shl_long */
 
@@ -7979,51 +7859,21 @@
 .Lop_ushr_long_finish:
     SET_VREG64_GOTO(v1, zero, rOBJ, t0)    #  vAA/vAA+1 <- rlo/rhi
 
-/* continuation for op_add_double */
-
-.Lop_add_double_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
-/* continuation for op_sub_double */
-
-.Lop_sub_double_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
-/* continuation for op_mul_double */
-
-.Lop_mul_double_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
-/* continuation for op_div_double */
-
-.Lop_div_double_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
-/* continuation for op_rem_double */
-
-.Lop_rem_double_finish:
-    GET_INST_OPCODE(t0)                    #  extract opcode from rINST
-    GOTO_OPCODE(t0)                        #  jump to next instruction
-
 /* continuation for op_shl_long_2addr */
 
 .Lop_shl_long_2addr_finish:
-    SET_VREG64_GOTO(zero, v0, rOBJ, t0)    #  vAA/vAA+1 <- rlo/rhi
+    SET_VREG64_GOTO(zero, v0, rOBJ, t0)    #  vA/vA+1 <- rlo/rhi
 
 /* continuation for op_shr_long_2addr */
 
 .Lop_shr_long_2addr_finish:
     sra     a3, a1, 31                     #  a3<- sign(ah)
-    SET_VREG64_GOTO(v1, a3, t2, t0)        #  vAA/vAA+1 <- rlo/rhi
+    SET_VREG64_GOTO(v1, a3, t2, t0)        #  vA/vA+1 <- rlo/rhi
 
 /* continuation for op_ushr_long_2addr */
 
 .Lop_ushr_long_2addr_finish:
-    SET_VREG64_GOTO(v1, zero, t3, t0)      #  vAA/vAA+1 <- rlo/rhi
+    SET_VREG64_GOTO(v1, zero, t3, t0)      #  vA/vA+1 <- rlo/rhi
 
     .size   artMterpAsmSisterStart, .-artMterpAsmSisterStart
     .global artMterpAsmSisterEnd
@@ -12537,7 +12387,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fa: /* 0xfa */
+.L_ALT_op_invoke_polymorphic: /* 0xfa */
 /* File: mips/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12555,7 +12405,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fb: /* 0xfb */
+.L_ALT_op_invoke_polymorphic_range: /* 0xfb */
 /* File: mips/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12801,7 +12651,7 @@
     REFRESH_IBASE()
     addu    a2, rINST, rINST            # a2<- byte offset
     FETCH_ADVANCE_INST_RB(a2)           # update rPC, load rINST
-    and     ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     bnez    ra, .L_suspend_request_pending
     GET_INST_OPCODE(t0)                 # extract opcode from rINST
     GOTO_OPCODE(t0)                     # jump to next instruction
diff --git a/runtime/interpreter/mterp/out/mterp_mips64.S b/runtime/interpreter/mterp/out/mterp_mips64.S
index 88e972f..037787f 100644
--- a/runtime/interpreter/mterp/out/mterp_mips64.S
+++ b/runtime/interpreter/mterp/out/mterp_mips64.S
@@ -637,7 +637,7 @@
     jal     MterpThreadFenceForConstructor
     lw      ra, THREAD_FLAGS_OFFSET(rSELF)
     move    a0, rSELF
-    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqzc   ra, 1f
     jal     MterpSuspendCheck           # (self)
 1:
@@ -659,7 +659,7 @@
     jal     MterpThreadFenceForConstructor
     lw      ra, THREAD_FLAGS_OFFSET(rSELF)
     move    a0, rSELF
-    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqzc   ra, 1f
     jal     MterpSuspendCheck           # (self)
 1:
@@ -681,7 +681,7 @@
     jal     MterpThreadFenceForConstructor
     lw      ra, THREAD_FLAGS_OFFSET(rSELF)
     move    a0, rSELF
-    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqzc   ra, 1f
     jal     MterpSuspendCheck           # (self)
 1:
@@ -705,7 +705,7 @@
     jal     MterpThreadFenceForConstructor
     lw      ra, THREAD_FLAGS_OFFSET(rSELF)
     move    a0, rSELF
-    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqzc   ra, 1f
     jal     MterpSuspendCheck           # (self)
 1:
@@ -3121,7 +3121,7 @@
     .extern MterpSuspendCheck
     lw      ra, THREAD_FLAGS_OFFSET(rSELF)
     move    a0, rSELF
-    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqzc   ra, 1f
     jal     MterpSuspendCheck           # (self)
 1:
@@ -7080,26 +7080,16 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fa: /* 0xfa */
-/* File: mips64/op_unused_fa.S */
-/* File: mips64/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
+.L_op_invoke_polymorphic: /* 0xfa */
+/* Transfer stub to alternate interpreter */
     b       MterpFallback
 
-
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fb: /* 0xfb */
-/* File: mips64/op_unused_fb.S */
-/* File: mips64/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
+.L_op_invoke_polymorphic_range: /* 0xfb */
+/* Transfer stub to alternate interpreter */
     b       MterpFallback
 
-
 /* ------------------------------ */
     .balign 128
 .L_op_unused_fc: /* 0xfc */
@@ -11962,7 +11952,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fa: /* 0xfa */
+.L_ALT_op_invoke_polymorphic: /* 0xfa */
 /* File: mips64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11981,7 +11971,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fb: /* 0xfb */
+.L_ALT_op_invoke_polymorphic_range: /* 0xfb */
 /* File: mips64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12189,7 +12179,7 @@
     REFRESH_IBASE
     daddu   a2, rINST, rINST            # a2<- byte offset
     FETCH_ADVANCE_INST_RB a2            # update rPC, load rINST
-    and     ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     bnezc   ra, .L_suspend_request_pending
     GET_INST_OPCODE v0                  # extract opcode from rINST
     GOTO_OPCODE v0                      # jump to next instruction
@@ -12306,7 +12296,7 @@
     lw      ra, THREAD_FLAGS_OFFSET(rSELF)
     sd      a0, 0(a2)
     move    a0, rSELF
-    and     ra, ra, (THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST)
+    and     ra, ra, THREAD_SUSPEND_OR_CHECKPOINT_REQUEST
     beqzc   ra, check2
     jal     MterpSuspendCheck                       # (self)
 check2:
diff --git a/runtime/interpreter/mterp/out/mterp_x86.S b/runtime/interpreter/mterp/out/mterp_x86.S
index 29ee248..695d1e4 100644
--- a/runtime/interpreter/mterp/out/mterp_x86.S
+++ b/runtime/interpreter/mterp/out/mterp_x86.S
@@ -612,7 +612,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movl    rSELF, %eax
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    testl   $(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
     jz      1f
     movl    %eax, OUT_ARG0(%esp)
     call    SYMBOL(MterpSuspendCheck)
@@ -634,7 +634,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movl    rSELF, %eax
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    testl   $(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
     jz      1f
     movl    %eax, OUT_ARG0(%esp)
     call    SYMBOL(MterpSuspendCheck)
@@ -654,7 +654,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movl    rSELF, %eax
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    testl   $(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
     jz      1f
     movl    %eax, OUT_ARG0(%esp)
     call    SYMBOL(MterpSuspendCheck)
@@ -677,7 +677,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movl    rSELF, %eax
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    testl   $(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
     jz      1f
     movl    %eax, OUT_ARG0(%esp)
     call    SYMBOL(MterpSuspendCheck)
@@ -3104,7 +3104,7 @@
 .L_op_return_void_no_barrier: /* 0x73 */
 /* File: x86/op_return_void_no_barrier.S */
     movl    rSELF, %eax
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    testl   $(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
     jz      1f
     movl    %eax, OUT_ARG0(%esp)
     call    SYMBOL(MterpSuspendCheck)
@@ -6278,23 +6278,15 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fa: /* 0xfa */
-/* File: x86/op_unused_fa.S */
-/* File: x86/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
+.L_op_invoke_polymorphic: /* 0xfa */
+/* Transfer stub to alternate interpreter */
     jmp     MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fb: /* 0xfb */
-/* File: x86/op_unused_fb.S */
-/* File: x86/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
+.L_op_invoke_polymorphic_range: /* 0xfb */
+/* Transfer stub to alternate interpreter */
     jmp     MterpFallback
 
 
@@ -12370,7 +12362,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fa: /* 0xfa */
+.L_ALT_op_invoke_polymorphic: /* 0xfa */
 /* File: x86/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12394,7 +12386,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fb: /* 0xfb */
+.L_ALT_op_invoke_polymorphic_range: /* 0xfb */
 /* File: x86/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -12686,7 +12678,7 @@
     je      .L_add_batch                    # counted down to zero - report
 .L_resume_backward_branch:
     movl    rSELF, %eax
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    testl   $(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
     leal    (rPC, rINST, 2), rPC
     FETCH_INST
     jnz     .L_suspend_request_pending
diff --git a/runtime/interpreter/mterp/out/mterp_x86_64.S b/runtime/interpreter/mterp/out/mterp_x86_64.S
index bc1abcc..2eab58c 100644
--- a/runtime/interpreter/mterp/out/mterp_x86_64.S
+++ b/runtime/interpreter/mterp/out/mterp_x86_64.S
@@ -587,7 +587,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movq    rSELF, OUT_ARG0
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    testl   $(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
     jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
@@ -607,7 +607,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movq    rSELF, OUT_ARG0
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    testl   $(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
     jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
@@ -625,7 +625,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movq    rSELF, OUT_ARG0
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    testl   $(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
     jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
@@ -646,7 +646,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movq    rSELF, OUT_ARG0
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    testl   $(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
     jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
@@ -2972,7 +2972,7 @@
 .L_op_return_void_no_barrier: /* 0x73 */
 /* File: x86_64/op_return_void_no_barrier.S */
     movq    rSELF, OUT_ARG0
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    testl   $(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
     jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
@@ -6043,23 +6043,15 @@
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fa: /* 0xfa */
-/* File: x86_64/op_unused_fa.S */
-/* File: x86_64/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
+.L_op_invoke_polymorphic: /* 0xfa */
+/* Transfer stub to alternate interpreter */
     jmp     MterpFallback
 
 
 /* ------------------------------ */
     .balign 128
-.L_op_unused_fb: /* 0xfb */
-/* File: x86_64/op_unused_fb.S */
-/* File: x86_64/unused.S */
-/*
- * Bail to reference interpreter to throw.
- */
+.L_op_invoke_polymorphic_range: /* 0xfb */
+/* Transfer stub to alternate interpreter */
     jmp     MterpFallback
 
 
@@ -11635,7 +11627,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fa: /* 0xfa */
+.L_ALT_op_invoke_polymorphic: /* 0xfa */
 /* File: x86_64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11657,7 +11649,7 @@
 
 /* ------------------------------ */
     .balign 128
-.L_ALT_op_unused_fb: /* 0xfb */
+.L_ALT_op_invoke_polymorphic_range: /* 0xfb */
 /* File: x86_64/alt_stub.S */
 /*
  * Inter-instruction transfer stub.  Call out to MterpCheckBefore to handle
@@ -11923,7 +11915,7 @@
     je      .L_add_batch                    # counted down to zero - report
 .L_resume_backward_branch:
     movq    rSELF, %rax
-    testl   $(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%rax)
+    testl   $(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%rax)
     REFRESH_IBASE
     leaq    (rPC, rINSTq, 2), rPC
     FETCH_INST
diff --git a/runtime/interpreter/mterp/x86/footer.S b/runtime/interpreter/mterp/x86/footer.S
index e8c8ca8..088cb12 100644
--- a/runtime/interpreter/mterp/x86/footer.S
+++ b/runtime/interpreter/mterp/x86/footer.S
@@ -167,7 +167,7 @@
     je      .L_add_batch                    # counted down to zero - report
 .L_resume_backward_branch:
     movl    rSELF, %eax
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    testl   $$(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
     leal    (rPC, rINST, 2), rPC
     FETCH_INST
     jnz     .L_suspend_request_pending
diff --git a/runtime/interpreter/mterp/x86/op_return.S b/runtime/interpreter/mterp/x86/op_return.S
index 8e3cfad..a8ebbed 100644
--- a/runtime/interpreter/mterp/x86/op_return.S
+++ b/runtime/interpreter/mterp/x86/op_return.S
@@ -7,7 +7,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movl    rSELF, %eax
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    testl   $$(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
     jz      1f
     movl    %eax, OUT_ARG0(%esp)
     call    SYMBOL(MterpSuspendCheck)
diff --git a/runtime/interpreter/mterp/x86/op_return_void.S b/runtime/interpreter/mterp/x86/op_return_void.S
index a14a4f6..d9eddf3 100644
--- a/runtime/interpreter/mterp/x86/op_return_void.S
+++ b/runtime/interpreter/mterp/x86/op_return_void.S
@@ -1,7 +1,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movl    rSELF, %eax
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    testl   $$(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
     jz      1f
     movl    %eax, OUT_ARG0(%esp)
     call    SYMBOL(MterpSuspendCheck)
diff --git a/runtime/interpreter/mterp/x86/op_return_void_no_barrier.S b/runtime/interpreter/mterp/x86/op_return_void_no_barrier.S
index 1d0e933..2fbda6b 100644
--- a/runtime/interpreter/mterp/x86/op_return_void_no_barrier.S
+++ b/runtime/interpreter/mterp/x86/op_return_void_no_barrier.S
@@ -1,5 +1,5 @@
     movl    rSELF, %eax
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    testl   $$(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
     jz      1f
     movl    %eax, OUT_ARG0(%esp)
     call    SYMBOL(MterpSuspendCheck)
diff --git a/runtime/interpreter/mterp/x86/op_return_wide.S b/runtime/interpreter/mterp/x86/op_return_wide.S
index 7d1850a..5fff626 100644
--- a/runtime/interpreter/mterp/x86/op_return_wide.S
+++ b/runtime/interpreter/mterp/x86/op_return_wide.S
@@ -5,7 +5,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movl    rSELF, %eax
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
+    testl   $$(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%eax)
     jz      1f
     movl    %eax, OUT_ARG0(%esp)
     call    SYMBOL(MterpSuspendCheck)
diff --git a/runtime/interpreter/mterp/x86/op_unused_fa.S b/runtime/interpreter/mterp/x86/op_unused_fa.S
deleted file mode 100644
index 31d98c1..0000000
--- a/runtime/interpreter/mterp/x86/op_unused_fa.S
+++ /dev/null
@@ -1 +0,0 @@
-%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86/op_unused_fb.S b/runtime/interpreter/mterp/x86/op_unused_fb.S
deleted file mode 100644
index 31d98c1..0000000
--- a/runtime/interpreter/mterp/x86/op_unused_fb.S
+++ /dev/null
@@ -1 +0,0 @@
-%include "x86/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/footer.S b/runtime/interpreter/mterp/x86_64/footer.S
index f78f163..ed5e5ea 100644
--- a/runtime/interpreter/mterp/x86_64/footer.S
+++ b/runtime/interpreter/mterp/x86_64/footer.S
@@ -151,7 +151,7 @@
     je      .L_add_batch                    # counted down to zero - report
 .L_resume_backward_branch:
     movq    rSELF, %rax
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%rax)
+    testl   $$(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(%rax)
     REFRESH_IBASE
     leaq    (rPC, rINSTq, 2), rPC
     FETCH_INST
diff --git a/runtime/interpreter/mterp/x86_64/op_return.S b/runtime/interpreter/mterp/x86_64/op_return.S
index 07e0e53..8cb6cba 100644
--- a/runtime/interpreter/mterp/x86_64/op_return.S
+++ b/runtime/interpreter/mterp/x86_64/op_return.S
@@ -7,7 +7,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movq    rSELF, OUT_ARG0
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    testl   $$(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
     jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
diff --git a/runtime/interpreter/mterp/x86_64/op_return_void.S b/runtime/interpreter/mterp/x86_64/op_return_void.S
index 6a12df3..ba68e7e 100644
--- a/runtime/interpreter/mterp/x86_64/op_return_void.S
+++ b/runtime/interpreter/mterp/x86_64/op_return_void.S
@@ -1,7 +1,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movq    rSELF, OUT_ARG0
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    testl   $$(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
     jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
diff --git a/runtime/interpreter/mterp/x86_64/op_return_void_no_barrier.S b/runtime/interpreter/mterp/x86_64/op_return_void_no_barrier.S
index 822b2e8..6799da1 100644
--- a/runtime/interpreter/mterp/x86_64/op_return_void_no_barrier.S
+++ b/runtime/interpreter/mterp/x86_64/op_return_void_no_barrier.S
@@ -1,5 +1,5 @@
     movq    rSELF, OUT_ARG0
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    testl   $$(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
     jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
diff --git a/runtime/interpreter/mterp/x86_64/op_return_wide.S b/runtime/interpreter/mterp/x86_64/op_return_wide.S
index 288eb96..d6d6d1b 100644
--- a/runtime/interpreter/mterp/x86_64/op_return_wide.S
+++ b/runtime/interpreter/mterp/x86_64/op_return_wide.S
@@ -5,7 +5,7 @@
     .extern MterpThreadFenceForConstructor
     call    SYMBOL(MterpThreadFenceForConstructor)
     movq    rSELF, OUT_ARG0
-    testl   $$(THREAD_SUSPEND_REQUEST | THREAD_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
+    testl   $$(THREAD_SUSPEND_OR_CHECKPOINT_REQUEST), THREAD_FLAGS_OFFSET(OUT_ARG0)
     jz      1f
     call    SYMBOL(MterpSuspendCheck)
 1:
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_fa.S b/runtime/interpreter/mterp/x86_64/op_unused_fa.S
deleted file mode 100644
index 280615f..0000000
--- a/runtime/interpreter/mterp/x86_64/op_unused_fa.S
+++ /dev/null
@@ -1 +0,0 @@
-%include "x86_64/unused.S"
diff --git a/runtime/interpreter/mterp/x86_64/op_unused_fb.S b/runtime/interpreter/mterp/x86_64/op_unused_fb.S
deleted file mode 100644
index 280615f..0000000
--- a/runtime/interpreter/mterp/x86_64/op_unused_fb.S
+++ /dev/null
@@ -1 +0,0 @@
-%include "x86_64/unused.S"
diff --git a/runtime/interpreter/unstarted_runtime.cc b/runtime/interpreter/unstarted_runtime.cc
index ac5401f..a5b1038 100644
--- a/runtime/interpreter/unstarted_runtime.cc
+++ b/runtime/interpreter/unstarted_runtime.cc
@@ -38,6 +38,7 @@
 #include "gc/reference_processor.h"
 #include "handle_scope-inl.h"
 #include "interpreter/interpreter_common.h"
+#include "jvalue-inl.h"
 #include "mirror/array-inl.h"
 #include "mirror/class.h"
 #include "mirror/field-inl.h"
@@ -126,7 +127,8 @@
   if (found == nullptr && abort_if_not_found) {
     if (!self->IsExceptionPending()) {
       AbortTransactionOrFail(self, "%s failed in un-started runtime for class: %s",
-                             method_name.c_str(), PrettyDescriptor(descriptor.c_str()).c_str());
+                             method_name.c_str(),
+                             PrettyDescriptor(descriptor.c_str()).c_str());
     }
     return;
   }
@@ -150,7 +152,7 @@
     REQUIRES_SHARED(Locks::mutator_lock_) {
   if (self->IsExceptionPending()) {
     // If it is not the transaction abort exception, wrap it.
-    std::string type(PrettyTypeOf(self->GetException()));
+    std::string type(mirror::Object::PrettyTypeOf(self->GetException()));
     if (type != Transaction::kAbortExceptionDescriptor) {
       self->ThrowNewWrappedException("Ljava/lang/ClassNotFoundException;",
                                      "ClassNotFoundException");
@@ -241,7 +243,7 @@
   if (Runtime::Current()->IsActiveTransaction()) {
     if (h_klass.Get()->IsFinalizable()) {
       AbortTransactionF(self, "Class for newInstance is finalizable: '%s'",
-                        PrettyClass(h_klass.Get()).c_str());
+                        h_klass->PrettyClass().c_str());
       return;
     }
   }
@@ -265,13 +267,13 @@
     } else {
       self->ThrowNewExceptionF("Ljava/lang/InternalError;",
                                "Could not find default constructor for '%s'",
-                               PrettyClass(h_klass.Get()).c_str());
+                               h_klass->PrettyClass().c_str());
     }
   }
   if (!ok) {
     AbortTransactionOrFail(self, "Failed in Class.newInstance for '%s' with %s",
-                           PrettyClass(h_klass.Get()).c_str(),
-                           PrettyTypeOf(self->GetException()).c_str());
+                           h_klass->PrettyClass().c_str(),
+                           mirror::Object::PrettyTypeOf(self->GetException()).c_str());
   }
 }
 
@@ -299,7 +301,7 @@
   if (found == nullptr) {
     AbortTransactionOrFail(self, "Failed to find field in Class.getDeclaredField in un-started "
                            " runtime. name=%s class=%s", name2->ToModifiedUtf8().c_str(),
-                           PrettyDescriptor(klass).c_str());
+                           klass->PrettyDescriptor().c_str());
     return;
   }
   Runtime* runtime = Runtime::Current();
@@ -340,7 +342,7 @@
   Runtime* runtime = Runtime::Current();
   bool transaction = runtime->IsActiveTransaction();
   PointerSize pointer_size = runtime->GetClassLinker()->GetImagePointerSize();
-  mirror::Method* method;
+  ObjPtr<mirror::Method> method;
   if (transaction) {
     if (pointer_size == PointerSize::k64) {
       method = mirror::Class::GetDeclaredMethodInternal<PointerSize::k64, true>(
@@ -374,7 +376,7 @@
   Runtime* runtime = Runtime::Current();
   bool transaction = runtime->IsActiveTransaction();
   PointerSize pointer_size = runtime->GetClassLinker()->GetImagePointerSize();
-  mirror::Constructor* constructor;
+  ObjPtr<mirror::Constructor> constructor;
   if (transaction) {
     if (pointer_size == PointerSize::k64) {
       constructor = mirror::Class::GetDeclaredConstructorInternal<PointerSize::k64,
@@ -422,7 +424,7 @@
 
   std::unique_ptr<ZipArchive> zip_archive(ZipArchive::Open(jar_file.c_str(), error_msg));
   if (zip_archive == nullptr) {
-    return nullptr;;
+    return nullptr;
   }
   std::unique_ptr<ZipEntry> zip_entry(zip_archive->Find(entry_name, error_msg));
   if (zip_entry == nullptr) {
@@ -561,8 +563,8 @@
     if (self->DecodeJObject(WellKnownClasses::java_lang_BootClassLoader) !=
             this_classloader_class.Get()) {
       AbortTransactionOrFail(self,
-                            "Unsupported classloader type %s for getResourceAsStream",
-                            PrettyClass(this_classloader_class.Get()).c_str());
+                             "Unsupported classloader type %s for getResourceAsStream",
+                             mirror::Class::PrettyClass(this_classloader_class.Get()).c_str());
       return;
     }
   }
@@ -583,7 +585,7 @@
   // This might have an error pending. But semantics are to just return null.
   if (self->IsExceptionPending()) {
     // If it is an InternalError, keep it. See CheckExceptionGenerateClassNotFound.
-    std::string type(PrettyTypeOf(self->GetException()));
+    std::string type(mirror::Object::PrettyTypeOf(self->GetException()));
     if (type != "java.lang.InternalError") {
       self->ClearException();
     }
@@ -606,9 +608,12 @@
                                int32_t length)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   if (src_array->GetClass()->GetComponentType() != dst_array->GetClass()->GetComponentType()) {
-    AbortTransactionOrFail(self, "Types mismatched in arraycopy: %s vs %s.",
-                           PrettyDescriptor(src_array->GetClass()->GetComponentType()).c_str(),
-                           PrettyDescriptor(dst_array->GetClass()->GetComponentType()).c_str());
+    AbortTransactionOrFail(self,
+                           "Types mismatched in arraycopy: %s vs %s.",
+                           mirror::Class::PrettyDescriptor(
+                               src_array->GetClass()->GetComponentType()).c_str(),
+                           mirror::Class::PrettyDescriptor(
+                               dst_array->GetClass()->GetComponentType()).c_str());
     return;
   }
   mirror::PrimitiveArray<T>* src = down_cast<mirror::PrimitiveArray<T>*>(src_array);
@@ -673,8 +678,10 @@
         GetComponentType();
     if (trg_type->IsPrimitiveInt()) {
       AbortTransactionOrFail(self, "Type mismatch in arraycopy: %s vs %s",
-                             PrettyDescriptor(src_array->GetClass()->GetComponentType()).c_str(),
-                             PrettyDescriptor(dst_array->GetClass()->GetComponentType()).c_str());
+                             mirror::Class::PrettyDescriptor(
+                                 src_array->GetClass()->GetComponentType()).c_str(),
+                             mirror::Class::PrettyDescriptor(
+                                 dst_array->GetClass()->GetComponentType()).c_str());
       return;
     }
 
@@ -713,7 +720,7 @@
     PrimitiveArrayCopy<int32_t>(self, src_array, src_pos, dst_array, dst_pos, length);
   } else {
     AbortTransactionOrFail(self, "Unimplemented System.arraycopy for type '%s'",
-                           PrettyDescriptor(src_type).c_str());
+                           src_type->PrettyDescriptor().c_str());
   }
 }
 
@@ -838,7 +845,7 @@
 
 void UnstartedRuntime::UnstartedThreadLocalGet(
     Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset ATTRIBUTE_UNUSED) {
-  std::string caller(PrettyMethod(shadow_frame->GetLink()->GetMethod()));
+  std::string caller(ArtMethod::PrettyMethod(shadow_frame->GetLink()->GetMethod()));
   bool ok = false;
   if (caller == "void java.lang.FloatingDecimal.developLongDigits(int, long, long)" ||
       caller == "java.lang.String java.lang.FloatingDecimal.toJavaFormatString()") {
@@ -914,7 +921,7 @@
   result->SetJ(bit_cast<int64_t, double>(in));
 }
 
-static mirror::Object* GetDexFromDexCache(Thread* self, mirror::DexCache* dex_cache)
+static ObjPtr<mirror::Object> GetDexFromDexCache(Thread* self, mirror::DexCache* dex_cache)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   const DexFile* dex_file = dex_cache->GetDexFile();
   if (dex_file == nullptr) {
@@ -949,7 +956,7 @@
   mirror::Object* src = shadow_frame->GetVRegReference(arg_offset);
   bool have_dex = false;
   if (src != nullptr) {
-    mirror::Object* dex = GetDexFromDexCache(self, reinterpret_cast<mirror::DexCache*>(src));
+    ObjPtr<mirror::Object> dex = GetDexFromDexCache(self, src->AsDexCache());
     if (dex != nullptr) {
       have_dex = true;
       result->SetL(dex);
@@ -1090,10 +1097,12 @@
     return;
   }
   DCHECK_GE(start, 0);
-  DCHECK_GE(end, string->GetLength());
+  DCHECK_LE(start, end);
+  DCHECK_LE(end, string->GetLength());
   StackHandleScope<1> hs(self);
   Handle<mirror::CharArray> h_char_array(
       hs.NewHandle(shadow_frame->GetVRegReference(arg_offset + 3)->AsCharArray()));
+  DCHECK_GE(index, 0);
   DCHECK_LE(index, h_char_array->GetLength());
   DCHECK_LE(end - start, h_char_array->GetLength() - index);
   string->GetChars(start, end, h_char_array, index);
@@ -1185,13 +1194,13 @@
 // This allows statically initializing ConcurrentHashMap and SynchronousQueue.
 void UnstartedRuntime::UnstartedReferenceGetReferent(
     Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset) {
-  mirror::Reference* const ref = down_cast<mirror::Reference*>(
+  ObjPtr<mirror::Reference> const ref = down_cast<mirror::Reference*>(
       shadow_frame->GetVRegReference(arg_offset));
   if (ref == nullptr) {
     AbortTransactionOrFail(self, "Reference.getReferent() with null object");
     return;
   }
-  mirror::Object* const referent =
+  ObjPtr<mirror::Object> const referent =
       Runtime::Current()->GetHeap()->GetReferenceProcessor()->GetReferent(self, ref);
   result->SetL(referent);
 }
@@ -1203,7 +1212,7 @@
 //       initialization of other classes, so will *use* the value.
 void UnstartedRuntime::UnstartedRuntimeAvailableProcessors(
     Thread* self, ShadowFrame* shadow_frame, JValue* result, size_t arg_offset ATTRIBUTE_UNUSED) {
-  std::string caller(PrettyMethod(shadow_frame->GetLink()->GetMethod()));
+  std::string caller(ArtMethod::PrettyMethod(shadow_frame->GetLink()->GetMethod()));
   if (caller == "void java.util.concurrent.SynchronousQueue.<clinit>()") {
     // SynchronousQueue really only separates between single- and multiprocessor case. Return
     // 8 as a conservative upper approximation.
@@ -1232,19 +1241,6 @@
   int64_t offset = shadow_frame->GetVRegLong(arg_offset + 2);
   int64_t expectedValue = shadow_frame->GetVRegLong(arg_offset + 4);
   int64_t newValue = shadow_frame->GetVRegLong(arg_offset + 6);
-
-  // Must use non transactional mode.
-  if (kUseReadBarrier) {
-    // Need to make sure the reference stored in the field is a to-space one before attempting the
-    // CAS or the CAS could fail incorrectly.
-    mirror::HeapReference<mirror::Object>* field_addr =
-        reinterpret_cast<mirror::HeapReference<mirror::Object>*>(
-            reinterpret_cast<uint8_t*>(obj) + static_cast<size_t>(offset));
-    ReadBarrier::Barrier<mirror::Object, kWithReadBarrier, /*kAlwaysUpdateField*/true>(
-        obj,
-        MemberOffset(offset),
-        field_addr);
-  }
   bool success;
   // Check whether we're in a transaction, call accordingly.
   if (Runtime::Current()->IsActiveTransaction()) {
@@ -1278,7 +1274,7 @@
     mirror::HeapReference<mirror::Object>* field_addr =
         reinterpret_cast<mirror::HeapReference<mirror::Object>*>(
             reinterpret_cast<uint8_t*>(obj) + static_cast<size_t>(offset));
-    ReadBarrier::Barrier<mirror::Object, kWithReadBarrier, /*kAlwaysUpdateField*/true>(
+    ReadBarrier::Barrier<mirror::Object, kWithReadBarrier, /* kAlwaysUpdateField */ true>(
         obj,
         MemberOffset(offset),
         field_addr);
@@ -1471,13 +1467,17 @@
     uint32_t* args, JValue* result) {
   int32_t length = args[1];
   DCHECK_GE(length, 0);
-  mirror::Class* element_class = reinterpret_cast<mirror::Object*>(args[0])->AsClass();
+  ObjPtr<mirror::Class> element_class = reinterpret_cast<mirror::Object*>(args[0])->AsClass();
   Runtime* runtime = Runtime::Current();
-  mirror::Class* array_class = runtime->GetClassLinker()->FindArrayClass(self, &element_class);
+  ObjPtr<mirror::Class> array_class =
+      runtime->GetClassLinker()->FindArrayClass(self, &element_class);
   DCHECK(array_class != nullptr);
   gc::AllocatorType allocator = runtime->GetHeap()->GetCurrentAllocator();
-  result->SetL(mirror::Array::Alloc<true, true>(self, array_class, length,
-                                                array_class->GetComponentSizeShift(), allocator));
+  result->SetL(mirror::Array::Alloc<true, true>(self,
+                                                array_class,
+                                                length,
+                                                array_class->GetComponentSizeShift(),
+                                                allocator));
 }
 
 void UnstartedRuntime::UnstartedJNIVMStackGetCallingClassLoader(
@@ -1600,10 +1600,10 @@
     ThrowNegativeArraySizeException(length);
     return;
   }
-  mirror::Class* element_class = reinterpret_cast<mirror::Class*>(args[0])->AsClass();
+  ObjPtr<mirror::Class> element_class = reinterpret_cast<mirror::Class*>(args[0])->AsClass();
   Runtime* runtime = Runtime::Current();
   ClassLinker* class_linker = runtime->GetClassLinker();
-  mirror::Class* array_class = class_linker->FindArrayClass(self, &element_class);
+  ObjPtr<mirror::Class> array_class = class_linker->FindArrayClass(self, &element_class);
   if (UNLIKELY(array_class == nullptr)) {
     CHECK(self->IsExceptionPending());
     return;
@@ -1619,9 +1619,9 @@
     uint32_t* args ATTRIBUTE_UNUSED, JValue* result) {
   ScopedObjectAccessUnchecked soa(self);
   if (Runtime::Current()->IsActiveTransaction()) {
-    result->SetL(soa.Decode<mirror::Object>(self->CreateInternalStackTrace<true>(soa)).Ptr());
+    result->SetL(soa.Decode<mirror::Object>(self->CreateInternalStackTrace<true>(soa)));
   } else {
-    result->SetL(soa.Decode<mirror::Object>(self->CreateInternalStackTrace<false>(soa)).Ptr());
+    result->SetL(soa.Decode<mirror::Object>(self->CreateInternalStackTrace<false>(soa)));
   }
 }
 
@@ -1743,7 +1743,7 @@
   // problems in core libraries.
   CHECK(tables_initialized_);
 
-  std::string name(PrettyMethod(shadow_frame->GetMethod()));
+  std::string name(ArtMethod::PrettyMethod(shadow_frame->GetMethod()));
   const auto& iter = invoke_handlers_.find(name);
   if (iter != invoke_handlers_.end()) {
     // Clear out the result in case it's not zeroed out.
@@ -1764,7 +1764,7 @@
 // Hand select a number of methods to be run in a not yet started runtime without using JNI.
 void UnstartedRuntime::Jni(Thread* self, ArtMethod* method, mirror::Object* receiver,
                            uint32_t* args, JValue* result) {
-  std::string name(PrettyMethod(method));
+  std::string name(ArtMethod::PrettyMethod(method));
   const auto& iter = jni_handlers_.find(name);
   if (iter != jni_handlers_.end()) {
     // Clear out the result in case it's not zeroed out.
@@ -1774,7 +1774,7 @@
     AbortTransactionF(self, "Attempt to invoke native method in non-started runtime: %s",
                       name.c_str());
   } else {
-    LOG(FATAL) << "Calling native method " << PrettyMethod(method) << " in an unstarted "
+    LOG(FATAL) << "Calling native method " << ArtMethod::PrettyMethod(method) << " in an unstarted "
         "non-transactional runtime";
   }
 }
diff --git a/runtime/interpreter/unstarted_runtime_test.cc b/runtime/interpreter/unstarted_runtime_test.cc
index 6a4add3..b190c81 100644
--- a/runtime/interpreter/unstarted_runtime_test.cc
+++ b/runtime/interpreter/unstarted_runtime_test.cc
@@ -81,20 +81,21 @@
 
   static mirror::ObjectArray<mirror::Object>* CreateObjectArray(
       Thread* self,
-      mirror::Class* component_type,
+      ObjPtr<mirror::Class> component_type,
       const StackHandleScope<3>& data)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     Runtime* runtime = Runtime::Current();
-    mirror::Class* array_type = runtime->GetClassLinker()->FindArrayClass(self, &component_type);
+    ObjPtr<mirror::Class> array_type =
+        runtime->GetClassLinker()->FindArrayClass(self, &component_type);
     CHECK(array_type != nullptr);
-    mirror::ObjectArray<mirror::Object>* result =
+    ObjPtr<mirror::ObjectArray<mirror::Object>> result =
         mirror::ObjectArray<mirror::Object>::Alloc(self, array_type, 3);
     CHECK(result != nullptr);
     for (size_t i = 0; i < 3; ++i) {
       result->Set(static_cast<int32_t>(i), data.GetReference(i));
       CHECK(!self->IsExceptionPending());
     }
-    return result;
+    return result.Ptr();
   }
 
   static void CheckObjectArray(mirror::ObjectArray<mirror::Object>* array,
diff --git a/runtime/java_vm_ext.cc b/runtime/java_vm_ext.cc
index ecd6b52..caf705a 100644
--- a/runtime/java_vm_ext.cc
+++ b/runtime/java_vm_ext.cc
@@ -42,11 +42,9 @@
 
 namespace art {
 
-static size_t gGlobalsInitial = 512;  // Arbitrary.
-static size_t gGlobalsMax = 51200;  // Arbitrary sanity check. (Must fit in 16 bits.)
+static constexpr size_t kGlobalsMax = 51200;  // Arbitrary sanity check. (Must fit in 16 bits.)
 
-static const size_t kWeakGlobalsInitial = 16;  // Arbitrary.
-static const size_t kWeakGlobalsMax = 51200;  // Arbitrary sanity check. (Must fit in 16 bits.)
+static constexpr size_t kWeakGlobalsMax = 51200;  // Arbitrary sanity check. (Must fit in 16 bits.)
 
 bool JavaVMExt::IsBadJniVersion(int version) {
   // We don't support JNI_VERSION_1_1. These are the only other valid versions.
@@ -56,10 +54,10 @@
 class SharedLibrary {
  public:
   SharedLibrary(JNIEnv* env, Thread* self, const std::string& path, void* handle,
-                jobject class_loader, void* class_loader_allocator)
+                bool needs_native_bridge, jobject class_loader, void* class_loader_allocator)
       : path_(path),
         handle_(handle),
-        needs_native_bridge_(false),
+        needs_native_bridge_(needs_native_bridge),
         class_loader_(env->NewWeakGlobalRef(class_loader)),
         class_loader_allocator_(class_loader_allocator),
         jni_on_load_lock_("JNI_OnLoad lock"),
@@ -75,9 +73,7 @@
       self->GetJniEnv()->DeleteWeakGlobalRef(class_loader_);
     }
 
-    if (!needs_native_bridge_) {
-      android::CloseNativeLibrary(handle_);
-    }
+    android::CloseNativeLibrary(handle_, needs_native_bridge_);
   }
 
   jweak GetClassLoader() const {
@@ -133,8 +129,8 @@
     jni_on_load_cond_.Broadcast(self);
   }
 
-  void SetNeedsNativeBridge() {
-    needs_native_bridge_ = true;
+  void SetNeedsNativeBridge(bool needs) {
+    needs_native_bridge_ = needs;
   }
 
   bool NeedsNativeBridge() const {
@@ -235,8 +231,8 @@
   void* FindNativeMethod(ArtMethod* m, std::string& detail)
       REQUIRES(Locks::jni_libraries_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    std::string jni_short_name(JniShortName(m));
-    std::string jni_long_name(JniLongName(m));
+    std::string jni_short_name(m->JniShortName());
+    std::string jni_long_name(m->JniLongName());
     mirror::ClassLoader* const declaring_class_loader = m->GetDeclaringClass()->GetClassLoader();
     ScopedObjectAccessUnchecked soa(Thread::Current());
     void* const declaring_class_loader_allocator =
@@ -258,13 +254,13 @@
         fn = library->FindSymbol(jni_long_name, shorty);
       }
       if (fn != nullptr) {
-        VLOG(jni) << "[Found native code for " << PrettyMethod(m)
+        VLOG(jni) << "[Found native code for " << m->PrettyMethod()
                   << " in \"" << library->GetPath() << "\"]";
         return fn;
       }
     }
     detail += "No implementation found for ";
-    detail += PrettyMethod(m);
+    detail += m->PrettyMethod();
     detail += " (tried " + jni_short_name + " and " + jni_long_name + ")";
     LOG(ERROR) << detail;
     return nullptr;
@@ -413,7 +409,9 @@
   JII::AttachCurrentThreadAsDaemon
 };
 
-JavaVMExt::JavaVMExt(Runtime* runtime, const RuntimeArgumentMap& runtime_options)
+JavaVMExt::JavaVMExt(Runtime* runtime,
+                     const RuntimeArgumentMap& runtime_options,
+                     std::string* error_msg)
     : runtime_(runtime),
       check_jni_abort_hook_(nullptr),
       check_jni_abort_hook_data_(nullptr),
@@ -422,14 +420,17 @@
       tracing_enabled_(runtime_options.Exists(RuntimeArgumentMap::JniTrace)
                        || VLOG_IS_ON(third_party_jni)),
       trace_(runtime_options.GetOrDefault(RuntimeArgumentMap::JniTrace)),
-      globals_lock_("JNI global reference table lock"),
-      globals_(gGlobalsInitial, gGlobalsMax, kGlobal),
+      globals_(kGlobalsMax, kGlobal, IndirectReferenceTable::ResizableCapacity::kNo, error_msg),
       libraries_(new Libraries),
       unchecked_functions_(&gJniInvokeInterface),
-      weak_globals_lock_("JNI weak global reference table lock", kJniWeakGlobalsLock),
-      weak_globals_(kWeakGlobalsInitial, kWeakGlobalsMax, kWeakGlobal),
+      weak_globals_(kWeakGlobalsMax,
+                    kWeakGlobal,
+                    IndirectReferenceTable::ResizableCapacity::kNo,
+                    error_msg),
       allow_accessing_weak_globals_(true),
-      weak_globals_add_condition_("weak globals add condition", weak_globals_lock_),
+      weak_globals_add_condition_("weak globals add condition",
+                                  (CHECK(Locks::jni_weak_globals_lock_ != nullptr),
+                                   *Locks::jni_weak_globals_lock_)),
       env_hooks_() {
   functions = unchecked_functions_;
   SetCheckJniEnabled(runtime_options.Exists(RuntimeArgumentMap::CheckJni));
@@ -438,6 +439,19 @@
 JavaVMExt::~JavaVMExt() {
 }
 
+// Checking "globals" and "weak_globals" usually requires locks, but we
+// don't need the locks to check for validity when constructing the
+// object. Use NO_THREAD_SAFETY_ANALYSIS for this.
+std::unique_ptr<JavaVMExt> JavaVMExt::Create(Runtime* runtime,
+                                             const RuntimeArgumentMap& runtime_options,
+                                             std::string* error_msg) NO_THREAD_SAFETY_ANALYSIS {
+  std::unique_ptr<JavaVMExt> java_vm(new JavaVMExt(runtime, runtime_options, error_msg));
+  if (java_vm && java_vm->globals_.IsValid() && java_vm->weak_globals_.IsValid()) {
+    return java_vm;
+  }
+  return nullptr;
+}
+
 jint JavaVMExt::HandleGetEnv(/*out*/void** env, jint version) {
   for (GetEnvHook hook : env_hooks_) {
     jint res = hook(this, env, version);
@@ -471,7 +485,7 @@
   }
   // TODO: is this useful given that we're about to dump the calling thread's stack?
   if (current_method != nullptr) {
-    os << "\n    from " << PrettyMethod(current_method);
+    os << "\n    from " << current_method->PrettyMethod();
   }
   os << "\n";
   self->Dump(os);
@@ -537,8 +551,8 @@
   if (obj == nullptr) {
     return nullptr;
   }
-  WriterMutexLock mu(self, globals_lock_);
-  IndirectRef ref = globals_.Add(IRT_FIRST_SEGMENT, obj.Ptr());
+  WriterMutexLock mu(self, *Locks::jni_globals_lock_);
+  IndirectRef ref = globals_.Add(kIRTFirstSegment, obj);
   return reinterpret_cast<jobject>(ref);
 }
 
@@ -546,11 +560,14 @@
   if (obj == nullptr) {
     return nullptr;
   }
-  MutexLock mu(self, weak_globals_lock_);
+  MutexLock mu(self, *Locks::jni_weak_globals_lock_);
   while (UNLIKELY(!MayAccessWeakGlobals(self))) {
+    // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the
+    // presence of threads blocking for weak ref access.
+    self->CheckEmptyCheckpoint();
     weak_globals_add_condition_.WaitHoldingLocks(self);
   }
-  IndirectRef ref = weak_globals_.Add(IRT_FIRST_SEGMENT, obj.Ptr());
+  IndirectRef ref = weak_globals_.Add(kIRTFirstSegment, obj);
   return reinterpret_cast<jweak>(ref);
 }
 
@@ -558,8 +575,8 @@
   if (obj == nullptr) {
     return;
   }
-  WriterMutexLock mu(self, globals_lock_);
-  if (!globals_.Remove(IRT_FIRST_SEGMENT, obj)) {
+  WriterMutexLock mu(self, *Locks::jni_globals_lock_);
+  if (!globals_.Remove(kIRTFirstSegment, obj)) {
     LOG(WARNING) << "JNI WARNING: DeleteGlobalRef(" << obj << ") "
                  << "failed to find entry";
   }
@@ -569,8 +586,8 @@
   if (obj == nullptr) {
     return;
   }
-  MutexLock mu(self, weak_globals_lock_);
-  if (!weak_globals_.Remove(IRT_FIRST_SEGMENT, obj)) {
+  MutexLock mu(self, *Locks::jni_weak_globals_lock_);
+  if (!weak_globals_.Remove(kIRTFirstSegment, obj)) {
     LOG(WARNING) << "JNI WARNING: DeleteWeakGlobalRef(" << obj << ") "
                  << "failed to find entry";
   }
@@ -597,11 +614,11 @@
   }
   Thread* self = Thread::Current();
   {
-    ReaderMutexLock mu(self, globals_lock_);
+    ReaderMutexLock mu(self, *Locks::jni_globals_lock_);
     os << "; globals=" << globals_.Capacity();
   }
   {
-    MutexLock mu(self, weak_globals_lock_);
+    MutexLock mu(self, *Locks::jni_weak_globals_lock_);
     if (weak_globals_.Capacity() > 0) {
       os << " (plus " << weak_globals_.Capacity() << " weak)";
     }
@@ -617,7 +634,7 @@
 void JavaVMExt::DisallowNewWeakGlobals() {
   CHECK(!kUseReadBarrier);
   Thread* const self = Thread::Current();
-  MutexLock mu(self, weak_globals_lock_);
+  MutexLock mu(self, *Locks::jni_weak_globals_lock_);
   // DisallowNewWeakGlobals is only called by CMS during the pause. It is required to have the
   // mutator lock exclusively held so that we don't have any threads in the middle of
   // DecodeWeakGlobal.
@@ -628,24 +645,23 @@
 void JavaVMExt::AllowNewWeakGlobals() {
   CHECK(!kUseReadBarrier);
   Thread* self = Thread::Current();
-  MutexLock mu(self, weak_globals_lock_);
+  MutexLock mu(self, *Locks::jni_weak_globals_lock_);
   allow_accessing_weak_globals_.StoreSequentiallyConsistent(true);
   weak_globals_add_condition_.Broadcast(self);
 }
 
 void JavaVMExt::BroadcastForNewWeakGlobals() {
-  CHECK(kUseReadBarrier);
   Thread* self = Thread::Current();
-  MutexLock mu(self, weak_globals_lock_);
+  MutexLock mu(self, *Locks::jni_weak_globals_lock_);
   weak_globals_add_condition_.Broadcast(self);
 }
 
-mirror::Object* JavaVMExt::DecodeGlobal(IndirectRef ref) {
-  return globals_.SynchronizedGet(ref).Ptr();
+ObjPtr<mirror::Object> JavaVMExt::DecodeGlobal(IndirectRef ref) {
+  return globals_.SynchronizedGet(ref);
 }
 
-void JavaVMExt::UpdateGlobal(Thread* self, IndirectRef ref, mirror::Object* result) {
-  WriterMutexLock mu(self, globals_lock_);
+void JavaVMExt::UpdateGlobal(Thread* self, IndirectRef ref, ObjPtr<mirror::Object> result) {
+  WriterMutexLock mu(self, *Locks::jni_globals_lock_);
   globals_.Update(ref, result);
 }
 
@@ -660,33 +676,36 @@
       allow_accessing_weak_globals_.LoadSequentiallyConsistent();
 }
 
-mirror::Object* JavaVMExt::DecodeWeakGlobal(Thread* self, IndirectRef ref) {
+ObjPtr<mirror::Object> JavaVMExt::DecodeWeakGlobal(Thread* self, IndirectRef ref) {
   // It is safe to access GetWeakRefAccessEnabled without the lock since CC uses checkpoints to call
   // SetWeakRefAccessEnabled, and the other collectors only modify allow_accessing_weak_globals_
   // when the mutators are paused.
   // This only applies in the case where MayAccessWeakGlobals goes from false to true. In the other
   // case, it may be racy, this is benign since DecodeWeakGlobalLocked does the correct behavior
   // if MayAccessWeakGlobals is false.
-  DCHECK_EQ(GetIndirectRefKind(ref), kWeakGlobal);
+  DCHECK_EQ(IndirectReferenceTable::GetIndirectRefKind(ref), kWeakGlobal);
   if (LIKELY(MayAccessWeakGlobalsUnlocked(self))) {
-    return weak_globals_.SynchronizedGet(ref).Ptr();
+    return weak_globals_.SynchronizedGet(ref);
   }
-  MutexLock mu(self, weak_globals_lock_);
+  MutexLock mu(self, *Locks::jni_weak_globals_lock_);
   return DecodeWeakGlobalLocked(self, ref);
 }
 
-mirror::Object* JavaVMExt::DecodeWeakGlobalLocked(Thread* self, IndirectRef ref) {
+ObjPtr<mirror::Object> JavaVMExt::DecodeWeakGlobalLocked(Thread* self, IndirectRef ref) {
   if (kDebugLocking) {
-    weak_globals_lock_.AssertHeld(self);
+    Locks::jni_weak_globals_lock_->AssertHeld(self);
   }
   while (UNLIKELY(!MayAccessWeakGlobals(self))) {
+    // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the
+    // presence of threads blocking for weak ref access.
+    self->CheckEmptyCheckpoint();
     weak_globals_add_condition_.WaitHoldingLocks(self);
   }
-  return weak_globals_.Get(ref).Ptr();
+  return weak_globals_.Get(ref);
 }
 
-mirror::Object* JavaVMExt::DecodeWeakGlobalDuringShutdown(Thread* self, IndirectRef ref) {
-  DCHECK_EQ(GetIndirectRefKind(ref), kWeakGlobal);
+ObjPtr<mirror::Object> JavaVMExt::DecodeWeakGlobalDuringShutdown(Thread* self, IndirectRef ref) {
+  DCHECK_EQ(IndirectReferenceTable::GetIndirectRefKind(ref), kWeakGlobal);
   DCHECK(Runtime::Current()->IsShuttingDown(self));
   if (self != nullptr) {
     return DecodeWeakGlobal(self, ref);
@@ -695,13 +714,16 @@
   if (!kUseReadBarrier) {
     DCHECK(allow_accessing_weak_globals_.LoadSequentiallyConsistent());
   }
-  return weak_globals_.SynchronizedGet(ref).Ptr();
+  return weak_globals_.SynchronizedGet(ref);
 }
 
 bool JavaVMExt::IsWeakGlobalCleared(Thread* self, IndirectRef ref) {
-  DCHECK_EQ(GetIndirectRefKind(ref), kWeakGlobal);
-  MutexLock mu(self, weak_globals_lock_);
+  DCHECK_EQ(IndirectReferenceTable::GetIndirectRefKind(ref), kWeakGlobal);
+  MutexLock mu(self, *Locks::jni_weak_globals_lock_);
   while (UNLIKELY(!MayAccessWeakGlobals(self))) {
+    // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the
+    // presence of threads blocking for weak ref access.
+    self->CheckEmptyCheckpoint();
     weak_globals_add_condition_.WaitHoldingLocks(self);
   }
   // When just checking a weak ref has been cleared, avoid triggering the read barrier in decode
@@ -711,19 +733,19 @@
   return Runtime::Current()->IsClearedJniWeakGlobal(weak_globals_.Get<kWithoutReadBarrier>(ref));
 }
 
-void JavaVMExt::UpdateWeakGlobal(Thread* self, IndirectRef ref, mirror::Object* result) {
-  MutexLock mu(self, weak_globals_lock_);
+void JavaVMExt::UpdateWeakGlobal(Thread* self, IndirectRef ref, ObjPtr<mirror::Object> result) {
+  MutexLock mu(self, *Locks::jni_weak_globals_lock_);
   weak_globals_.Update(ref, result);
 }
 
 void JavaVMExt::DumpReferenceTables(std::ostream& os) {
   Thread* self = Thread::Current();
   {
-    ReaderMutexLock mu(self, globals_lock_);
+    ReaderMutexLock mu(self, *Locks::jni_globals_lock_);
     globals_.Dump(os);
   }
   {
-    MutexLock mu(self, weak_globals_lock_);
+    MutexLock mu(self, *Locks::jni_weak_globals_lock_);
     weak_globals_.Dump(os);
   }
 }
@@ -801,24 +823,18 @@
 
   Locks::mutator_lock_->AssertNotHeld(self);
   const char* path_str = path.empty() ? nullptr : path.c_str();
+  bool needs_native_bridge = false;
   void* handle = android::OpenNativeLibrary(env,
                                             runtime_->GetTargetSdkVersion(),
                                             path_str,
                                             class_loader,
-                                            library_path);
-
-  bool needs_native_bridge = false;
-  if (handle == nullptr) {
-    if (android::NativeBridgeIsSupported(path_str)) {
-      handle = android::NativeBridgeLoadLibrary(path_str, RTLD_NOW);
-      needs_native_bridge = true;
-    }
-  }
+                                            library_path,
+                                            &needs_native_bridge,
+                                            error_msg);
 
   VLOG(jni) << "[Call to dlopen(\"" << path << "\", RTLD_NOW) returned " << handle << "]";
 
   if (handle == nullptr) {
-    *error_msg = dlerror();
     VLOG(jni) << "dlopen(\"" << path << "\", RTLD_NOW) failed: " << *error_msg;
     return false;
   }
@@ -834,7 +850,14 @@
   {
     // Create SharedLibrary ahead of taking the libraries lock to maintain lock ordering.
     std::unique_ptr<SharedLibrary> new_library(
-        new SharedLibrary(env, self, path, handle, class_loader, class_loader_allocator));
+        new SharedLibrary(env,
+                          self,
+                          path,
+                          handle,
+                          needs_native_bridge,
+                          class_loader,
+                          class_loader_allocator));
+
     MutexLock mu(self, *Locks::jni_libraries_lock_);
     library = libraries_->Get(path);
     if (library == nullptr) {  // We won race to get libraries_lock.
@@ -851,11 +874,7 @@
   VLOG(jni) << "[Added shared library \"" << path << "\" for ClassLoader " << class_loader << "]";
 
   bool was_successful = false;
-  void* sym;
-  if (needs_native_bridge) {
-    library->SetNeedsNativeBridge();
-  }
-  sym = library->FindSymbol("JNI_OnLoad", nullptr);
+  void* sym = library->FindSymbol("JNI_OnLoad", nullptr);
   if (sym == nullptr) {
     VLOG(jni) << "[No JNI_OnLoad found in \"" << path << "\"]";
     was_successful = true;
@@ -904,7 +923,7 @@
   CHECK(m->IsNative());
   mirror::Class* c = m->GetDeclaringClass();
   // If this is a static method, it could be called before the class has been initialized.
-  CHECK(c->IsInitializing()) << c->GetStatus() << " " << PrettyMethod(m);
+  CHECK(c->IsInitializing()) << c->GetStatus() << " " << m->PrettyMethod();
   std::string detail;
   void* native_method;
   Thread* self = Thread::Current();
@@ -920,7 +939,7 @@
 }
 
 void JavaVMExt::SweepJniWeakGlobals(IsMarkedVisitor* visitor) {
-  MutexLock mu(Thread::Current(), weak_globals_lock_);
+  MutexLock mu(Thread::Current(), *Locks::jni_weak_globals_lock_);
   Runtime* const runtime = Runtime::Current();
   for (auto* entry : weak_globals_) {
     // Need to skip null here to distinguish between null entries and cleared weak ref entries.
@@ -937,13 +956,13 @@
 }
 
 void JavaVMExt::TrimGlobals() {
-  WriterMutexLock mu(Thread::Current(), globals_lock_);
+  WriterMutexLock mu(Thread::Current(), *Locks::jni_globals_lock_);
   globals_.Trim();
 }
 
 void JavaVMExt::VisitRoots(RootVisitor* visitor) {
   Thread* self = Thread::Current();
-  ReaderMutexLock mu(self, globals_lock_);
+  ReaderMutexLock mu(self, *Locks::jni_globals_lock_);
   globals_.VisitRoots(visitor, RootInfo(kRootJNIGlobal));
   // The weak_globals table is visited by the GC itself (because it mutates the table).
 }
diff --git a/runtime/java_vm_ext.h b/runtime/java_vm_ext.h
index 558ffff..7374920 100644
--- a/runtime/java_vm_ext.h
+++ b/runtime/java_vm_ext.h
@@ -43,7 +43,14 @@
 
 class JavaVMExt : public JavaVM {
  public:
-  JavaVMExt(Runtime* runtime, const RuntimeArgumentMap& runtime_options);
+  // Creates a new JavaVMExt object.
+  // Returns nullptr on error, in which case error_msg is set to a message
+  // describing the error.
+  static std::unique_ptr<JavaVMExt> Create(Runtime* runtime,
+                                           const RuntimeArgumentMap& runtime_options,
+                                           std::string* error_msg);
+
+
   ~JavaVMExt();
 
   bool ForceCopy() const {
@@ -109,72 +116,80 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   void DumpForSigQuit(std::ostream& os)
-      REQUIRES(!Locks::jni_libraries_lock_, !globals_lock_, !weak_globals_lock_);
+      REQUIRES(!Locks::jni_libraries_lock_,
+               !Locks::jni_globals_lock_,
+               !Locks::jni_weak_globals_lock_);
 
   void DumpReferenceTables(std::ostream& os)
-      REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!globals_lock_, !weak_globals_lock_);
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(!Locks::jni_globals_lock_, !Locks::jni_weak_globals_lock_);
 
   bool SetCheckJniEnabled(bool enabled);
 
   void VisitRoots(RootVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!globals_lock_);
+      REQUIRES(!Locks::jni_globals_lock_);
 
-  void DisallowNewWeakGlobals() REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!weak_globals_lock_);
-  void AllowNewWeakGlobals() REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!weak_globals_lock_);
-  void BroadcastForNewWeakGlobals() REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!weak_globals_lock_);
+  void DisallowNewWeakGlobals()
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(!Locks::jni_weak_globals_lock_);
+  void AllowNewWeakGlobals()
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(!Locks::jni_weak_globals_lock_);
+  void BroadcastForNewWeakGlobals()
+      REQUIRES(!Locks::jni_weak_globals_lock_);
 
   jobject AddGlobalRef(Thread* self, ObjPtr<mirror::Object> obj)
-      REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!globals_lock_);
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(!Locks::jni_globals_lock_);
 
   jweak AddWeakGlobalRef(Thread* self, ObjPtr<mirror::Object> obj)
-    REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!weak_globals_lock_);
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(!Locks::jni_weak_globals_lock_);
 
-  void DeleteGlobalRef(Thread* self, jobject obj) REQUIRES(!globals_lock_);
+  void DeleteGlobalRef(Thread* self, jobject obj) REQUIRES(!Locks::jni_globals_lock_);
 
-  void DeleteWeakGlobalRef(Thread* self, jweak obj) REQUIRES(!weak_globals_lock_);
+  void DeleteWeakGlobalRef(Thread* self, jweak obj) REQUIRES(!Locks::jni_weak_globals_lock_);
 
   void SweepJniWeakGlobals(IsMarkedVisitor* visitor)
-      REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!weak_globals_lock_);
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(!Locks::jni_weak_globals_lock_);
 
-  mirror::Object* DecodeGlobal(IndirectRef ref)
+  ObjPtr<mirror::Object> DecodeGlobal(IndirectRef ref)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void UpdateGlobal(Thread* self, IndirectRef ref, mirror::Object* result)
-      REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!globals_lock_);
-
-  mirror::Object* DecodeWeakGlobal(Thread* self, IndirectRef ref)
+  void UpdateGlobal(Thread* self, IndirectRef ref, ObjPtr<mirror::Object> result)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!weak_globals_lock_);
+      REQUIRES(!Locks::jni_globals_lock_);
 
-  mirror::Object* DecodeWeakGlobalLocked(Thread* self, IndirectRef ref)
+  ObjPtr<mirror::Object> DecodeWeakGlobal(Thread* self, IndirectRef ref)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(weak_globals_lock_);
+      REQUIRES(!Locks::jni_weak_globals_lock_);
+
+  ObjPtr<mirror::Object> DecodeWeakGlobalLocked(Thread* self, IndirectRef ref)
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(Locks::jni_weak_globals_lock_);
 
   // Like DecodeWeakGlobal() but to be used only during a runtime shutdown where self may be
   // null.
-  mirror::Object* DecodeWeakGlobalDuringShutdown(Thread* self, IndirectRef ref)
+  ObjPtr<mirror::Object> DecodeWeakGlobalDuringShutdown(Thread* self, IndirectRef ref)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!weak_globals_lock_);
+      REQUIRES(!Locks::jni_weak_globals_lock_);
 
   // Checks if the weak global ref has been cleared by the GC without decode (read barrier.)
   bool IsWeakGlobalCleared(Thread* self, IndirectRef ref)
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!weak_globals_lock_);
+      REQUIRES(!Locks::jni_weak_globals_lock_);
 
-  Mutex& WeakGlobalsLock() RETURN_CAPABILITY(weak_globals_lock_) {
-    return weak_globals_lock_;
-  }
-
-  void UpdateWeakGlobal(Thread* self, IndirectRef ref, mirror::Object* result)
-      REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!weak_globals_lock_);
+  void UpdateWeakGlobal(Thread* self, IndirectRef ref, ObjPtr<mirror::Object> result)
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(!Locks::jni_weak_globals_lock_);
 
   const JNIInvokeInterface* GetUncheckedFunctions() const {
     return unchecked_functions_;
   }
 
   void TrimGlobals() REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!globals_lock_);
+      REQUIRES(!Locks::jni_globals_lock_);
 
   jint HandleGetEnv(/*out*/void** env, jint version);
 
@@ -183,11 +198,15 @@
   static bool IsBadJniVersion(int version);
 
  private:
+  // The constructor should not be called directly. It may leave the object in
+  // an erroneous state, and the result needs to be checked.
+  JavaVMExt(Runtime* runtime, const RuntimeArgumentMap& runtime_options, std::string* error_msg);
+
   // Return true if self can currently access weak globals.
   bool MayAccessWeakGlobalsUnlocked(Thread* self) const REQUIRES_SHARED(Locks::mutator_lock_);
   bool MayAccessWeakGlobals(Thread* self) const
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(weak_globals_lock_);
+      REQUIRES(Locks::jni_weak_globals_lock_);
 
   Runtime* const runtime_;
 
@@ -203,8 +222,6 @@
   // Extra diagnostics.
   const std::string trace_;
 
-  // JNI global references.
-  ReaderWriterMutex globals_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   // Not guarded by globals_lock since we sometimes use SynchronizedGet in Thread::DecodeJObject.
   IndirectReferenceTable globals_;
 
@@ -215,8 +232,6 @@
   // Used by -Xcheck:jni.
   const JNIInvokeInterface* const unchecked_functions_;
 
-  // JNI weak global references.
-  Mutex weak_globals_lock_ DEFAULT_MUTEX_ACQUIRED_AFTER;
   // Since weak_globals_ contain weak roots, be careful not to
   // directly access the object references in it. Use Get() with the
   // read barrier enabled.
@@ -224,7 +239,7 @@
   IndirectReferenceTable weak_globals_;
   // Not guarded by weak_globals_lock since we may use SynchronizedGet in DecodeWeakGlobal.
   Atomic<bool> allow_accessing_weak_globals_;
-  ConditionVariable weak_globals_add_condition_ GUARDED_BY(weak_globals_lock_);
+  ConditionVariable weak_globals_add_condition_ GUARDED_BY(Locks::jni_weak_globals_lock_);
 
   // TODO Maybe move this to Runtime.
   std::vector<GetEnvHook> env_hooks_;
diff --git a/runtime/jdwp/jdwp_event.cc b/runtime/jdwp/jdwp_event.cc
index 6aebe9f..fad7d90 100644
--- a/runtime/jdwp/jdwp_event.cc
+++ b/runtime/jdwp/jdwp_event.cc
@@ -781,7 +781,7 @@
   SendRequestAndPossiblySuspend(pReq, suspend_policy, threadId);
 }
 
-static void LogMatchingEventsAndThread(const std::vector<JdwpEvent*> match_list,
+static void LogMatchingEventsAndThread(const std::vector<JdwpEvent*>& match_list,
                                        ObjectId thread_id)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   for (size_t i = 0, e = match_list.size(); i < e; ++i) {
@@ -1142,7 +1142,7 @@
   SetJdwpLocationFromEventLocation(pCatchLoc, &jdwp_catch_location);
 
   if (VLOG_IS_ON(jdwp)) {
-    std::string exceptionClassName(PrettyDescriptor(exception_object->GetClass()));
+    std::string exceptionClassName(mirror::Class::PrettyDescriptor(exception_object->GetClass()));
 
     LogMatchingEventsAndThread(match_list, thread_id);
     VLOG(jdwp) << "  throwLocation=" << jdwp_throw_location;
diff --git a/runtime/jdwp/object_registry.cc b/runtime/jdwp/object_registry.cc
index dc3bf16..170887e 100644
--- a/runtime/jdwp/object_registry.cc
+++ b/runtime/jdwp/object_registry.cc
@@ -180,7 +180,7 @@
   }
   ObjectRegistryEntry& entry = *it->second;
   *error = JDWP::ERR_NONE;
-  return self->DecodeJObject(entry.jni_reference);
+  return self->DecodeJObject(entry.jni_reference).Ptr();
 }
 
 jobject ObjectRegistry::GetJObject(JDWP::ObjectId id) {
diff --git a/runtime/jit/jit.cc b/runtime/jit/jit.cc
index afa52ca..23a5ddd 100644
--- a/runtime/jit/jit.cc
+++ b/runtime/jit/jit.cc
@@ -114,7 +114,7 @@
   } else {
     jit_options->invoke_transition_weight_ = std::max(
         jit_options->warmup_threshold_ / Jit::kDefaultInvokeTransitionWeightRatio,
-        static_cast<size_t>(1));;
+        static_cast<size_t>(1));
   }
 
   return jit_options;
@@ -246,14 +246,14 @@
 
   // Don't compile the method if it has breakpoints.
   if (Dbg::IsDebuggerActive() && Dbg::MethodHasAnyBreakpoints(method)) {
-    VLOG(jit) << "JIT not compiling " << PrettyMethod(method) << " due to breakpoint";
+    VLOG(jit) << "JIT not compiling " << method->PrettyMethod() << " due to breakpoint";
     return false;
   }
 
   // Don't compile the method if we are supposed to be deoptimized.
   instrumentation::Instrumentation* instrumentation = Runtime::Current()->GetInstrumentation();
   if (instrumentation->AreAllMethodsDeoptimized() || instrumentation->IsDeoptimized(method)) {
-    VLOG(jit) << "JIT not compiling " << PrettyMethod(method) << " due to deoptimization";
+    VLOG(jit) << "JIT not compiling " << method->PrettyMethod() << " due to deoptimization";
     return false;
   }
 
@@ -265,13 +265,13 @@
   }
 
   VLOG(jit) << "Compiling method "
-            << PrettyMethod(method_to_compile)
+            << ArtMethod::PrettyMethod(method_to_compile)
             << " osr=" << std::boolalpha << osr;
   bool success = jit_compile_method_(jit_compiler_handle_, method_to_compile, self, osr);
   code_cache_->DoneCompiling(method_to_compile, self, osr);
   if (!success) {
     VLOG(jit) << "Failed to compile method "
-              << PrettyMethod(method_to_compile)
+              << ArtMethod::PrettyMethod(method_to_compile)
               << " osr=" << std::boolalpha << osr;
   }
   return success;
@@ -364,8 +364,8 @@
 
 void Jit::DumpTypeInfoForLoadedTypes(ClassLinker* linker) {
   struct CollectClasses : public ClassVisitor {
-    bool operator()(mirror::Class* klass) override {
-      classes_.push_back(klass);
+    bool operator()(ObjPtr<mirror::Class> klass) OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
+      classes_.push_back(klass.Ptr());
       return true;
     }
     std::vector<mirror::Class*> classes_;
@@ -424,7 +424,7 @@
   // method while we are being suspended.
   const size_t number_of_vregs = method->GetCodeItem()->registers_size_;
   const char* shorty = method->GetShorty();
-  std::string method_name(VLOG_IS_ON(jit) ? PrettyMethod(method) : "");
+  std::string method_name(VLOG_IS_ON(jit) ? method->PrettyMethod() : "");
   void** memory = nullptr;
   size_t frame_size = 0;
   ShadowFrame* shadow_frame = nullptr;
@@ -539,7 +539,7 @@
     LOG(INFO) << "Compiler allocated "
               << PrettySize(bytes)
               << " to compile "
-              << PrettyMethod(method);
+              << ArtMethod::PrettyMethod(method);
   }
   MutexLock mu(Thread::Current(), lock_);
   memory_use_.AddValue(bytes);
@@ -574,7 +574,7 @@
     } else {
       DCHECK(kind_ == kAllocateProfile);
       if (ProfilingInfo::Create(self, method_, /* retry_allocation */ true)) {
-        VLOG(jit) << "Start profiling " << PrettyMethod(method_);
+        VLOG(jit) << "Start profiling " << ArtMethod::PrettyMethod(method_);
       }
     }
     ProfileSaver::NotifyJitActivity();
@@ -620,7 +620,7 @@
         (method->GetProfilingInfo(kRuntimePointerSize) == nullptr)) {
       bool success = ProfilingInfo::Create(self, method, /* retry_allocation */ false);
       if (success) {
-        VLOG(jit) << "Start profiling " << PrettyMethod(method);
+        VLOG(jit) << "Start profiling " << method->PrettyMethod();
       }
 
       if (thread_pool_ == nullptr) {
@@ -683,7 +683,7 @@
   }
 }
 
-void Jit::InvokeVirtualOrInterface(mirror::Object* this_object,
+void Jit::InvokeVirtualOrInterface(ObjPtr<mirror::Object> this_object,
                                    ArtMethod* caller,
                                    uint32_t dex_pc,
                                    ArtMethod* callee ATTRIBUTE_UNUSED) {
diff --git a/runtime/jit/jit.h b/runtime/jit/jit.h
index 35656cd..a782437 100644
--- a/runtime/jit/jit.h
+++ b/runtime/jit/jit.h
@@ -22,9 +22,10 @@
 #include "base/macros.h"
 #include "base/mutex.h"
 #include "base/timing_logger.h"
+#include "jit/profile_saver_options.h"
+#include "obj_ptr.h"
 #include "object_callbacks.h"
 #include "offline_profiling_info.h"
-#include "jit/profile_saver_options.h"
 #include "thread_pool.h"
 
 namespace art {
@@ -34,6 +35,11 @@
 struct RuntimeArgumentMap;
 union JValue;
 
+namespace mirror {
+class Object;
+class Class;
+}   // namespace mirror
+
 namespace jit {
 
 class JitCodeCache;
@@ -109,7 +115,7 @@
   void AddSamples(Thread* self, ArtMethod* method, uint16_t samples, bool with_backedges)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void InvokeVirtualOrInterface(mirror::Object* this_object,
+  void InvokeVirtualOrInterface(ObjPtr<mirror::Object> this_object,
                                 ArtMethod* caller,
                                 uint32_t dex_pc,
                                 ArtMethod* callee)
diff --git a/runtime/jit/jit_code_cache.cc b/runtime/jit/jit_code_cache.cc
index 2c6b249..a26d850 100644
--- a/runtime/jit/jit_code_cache.cc
+++ b/runtime/jit/jit_code_cache.cc
@@ -368,7 +368,7 @@
     last_update_time_ns_.StoreRelease(NanoTime());
     VLOG(jit)
         << "JIT added (osr=" << std::boolalpha << osr << std::noboolalpha << ") "
-        << PrettyMethod(method) << "@" << method
+        << ArtMethod::PrettyMethod(method) << "@" << method
         << " ccache_size=" << PrettySize(CodeCacheSizeLocked()) << ": "
         << " dcache_size=" << PrettySize(DataCacheSizeLocked()) << ": "
         << reinterpret_cast<const void*>(method_header->GetEntryPoint()) << ","
@@ -378,7 +378,7 @@
       LOG(INFO) << "JIT allocated "
                 << PrettySize(code_size)
                 << " for compiled code of "
-                << PrettyMethod(method);
+                << ArtMethod::PrettyMethod(method);
     }
   }
 
@@ -434,7 +434,7 @@
     LOG(INFO) << "JIT allocated "
               << PrettySize(size)
               << " for stack maps of "
-              << PrettyMethod(method);
+              << ArtMethod::PrettyMethod(method);
   }
   return result;
 }
@@ -806,7 +806,8 @@
   }
   if (kIsDebugBuild && method != nullptr) {
     DCHECK_EQ(it->second, method)
-        << PrettyMethod(method) << " " << PrettyMethod(it->second) << " " << std::hex << pc;
+        << ArtMethod::PrettyMethod(method) << " " << ArtMethod::PrettyMethod(it->second) << " "
+        << std::hex << pc;
   }
   return method_header;
 }
@@ -927,7 +928,7 @@
 
   ProfilingInfo* info = method->GetProfilingInfo(kRuntimePointerSize);
   if (info == nullptr) {
-    VLOG(jit) << PrettyMethod(method) << " needs a ProfilingInfo to be compiled";
+    VLOG(jit) << method->PrettyMethod() << " needs a ProfilingInfo to be compiled";
     // Because the counter is not atomic, there are some rare cases where we may not
     // hit the threshold for creating the ProfilingInfo. Reset the counter now to
     // "correct" this.
diff --git a/runtime/jit/offline_profiling_info.cc b/runtime/jit/offline_profiling_info.cc
index aa606a2..f535151 100644
--- a/runtime/jit/offline_profiling_info.cc
+++ b/runtime/jit/offline_profiling_info.cc
@@ -629,7 +629,7 @@
     os << "\n\tmethods: ";
     for (const auto method_it : dex_data.method_set) {
       if (dex_file != nullptr) {
-        os << "\n\t\t" << PrettyMethod(method_it, *dex_file, true);
+        os << "\n\t\t" << dex_file->PrettyMethod(method_it, true);
       } else {
         os << method_it << ",";
       }
diff --git a/runtime/jit/offline_profiling_info.h b/runtime/jit/offline_profiling_info.h
index 0b26f9b..fdca078 100644
--- a/runtime/jit/offline_profiling_info.h
+++ b/runtime/jit/offline_profiling_info.h
@@ -152,7 +152,7 @@
     uint8_t* Get() { return storage_.get(); }
 
    private:
-    std::unique_ptr<uint8_t> storage_;
+    std::unique_ptr<uint8_t[]> storage_;
     uint8_t* ptr_current_;
     uint8_t* ptr_end_;
   };
diff --git a/runtime/jit/profile_compilation_info_test.cc b/runtime/jit/profile_compilation_info_test.cc
index 764458a..1dd1e36 100644
--- a/runtime/jit/profile_compilation_info_test.cc
+++ b/runtime/jit/profile_compilation_info_test.cc
@@ -38,8 +38,8 @@
     Thread* self = Thread::Current();
     ScopedObjectAccess soa(self);
     StackHandleScope<1> hs(self);
-    Handle<mirror::ClassLoader> h_loader(hs.NewHandle(
-        reinterpret_cast<mirror::ClassLoader*>(self->DecodeJObject(class_loader))));
+    Handle<mirror::ClassLoader> h_loader(
+        hs.NewHandle(self->DecodeJObject(class_loader)->AsClassLoader()));
     mirror::Class* klass = class_linker->FindClass(self, clazz.c_str(), h_loader);
 
     const auto pointer_size = class_linker->GetImagePointerSize();
diff --git a/runtime/jit/profile_saver.cc b/runtime/jit/profile_saver.cc
index d23821b..11d601e 100644
--- a/runtime/jit/profile_saver.cc
+++ b/runtime/jit/profile_saver.cc
@@ -189,7 +189,7 @@
     : methods_(methods),
       startup_method_samples_(startup_method_samples) {}
 
-  virtual bool operator()(mirror::Class* klass) REQUIRES_SHARED(Locks::mutator_lock_) {
+  virtual bool operator()(ObjPtr<mirror::Class> klass) REQUIRES_SHARED(Locks::mutator_lock_) {
     if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(klass)) {
       return true;
     }
diff --git a/runtime/jit/profiling_info.cc b/runtime/jit/profiling_info.cc
index 6ba187e..9ec46f0 100644
--- a/runtime/jit/profiling_info.cc
+++ b/runtime/jit/profiling_info.cc
@@ -99,7 +99,7 @@
 
 void ProfilingInfo::AddInvokeInfo(uint32_t dex_pc, mirror::Class* cls) {
   InlineCache* cache = GetInlineCache(dex_pc);
-  CHECK(cache != nullptr) << PrettyMethod(method_) << "@" << dex_pc;
+  CHECK(cache != nullptr) << ArtMethod::PrettyMethod(method_) << "@" << dex_pc;
   for (size_t i = 0; i < InlineCache::kIndividualCacheSize; ++i) {
     mirror::Class* existing = cache->classes_[i].Read();
     if (existing == cls) {
diff --git a/runtime/jni_env_ext-inl.h b/runtime/jni_env_ext-inl.h
index 2cc7342..004f824 100644
--- a/runtime/jni_env_ext-inl.h
+++ b/runtime/jni_env_ext-inl.h
@@ -36,7 +36,7 @@
       if (entry_count > 16) {
         locals.Dump(LOG_STREAM(WARNING) << "Warning: more than 16 JNI local references: "
                                         << entry_count << " (most recent was a "
-                                        << PrettyTypeOf(obj) << ")\n");
+                                        << mirror::Object::PrettyTypeOf(obj) << ")\n");
       // TODO: LOG(FATAL) in a later release?
       }
     }
diff --git a/runtime/jni_env_ext.cc b/runtime/jni_env_ext.cc
index 0358494..342e0d2 100644
--- a/runtime/jni_env_ext.cc
+++ b/runtime/jni_env_ext.cc
@@ -33,8 +33,6 @@
 static constexpr size_t kMonitorsInitial = 32;  // Arbitrary.
 static constexpr size_t kMonitorsMax = 4096;  // Arbitrary sanity check.
 
-static constexpr size_t kLocalsInitial = 64;  // Arbitrary.
-
 // Checking "locals" requires the mutator lock, but at creation time we're really only interested
 // in validity, which isn't changing. To avoid grabbing the mutator lock, factored out and tagged
 // with NO_THREAD_SAFETY_ANALYSIS.
@@ -59,19 +57,19 @@
   return JNI_OK;
 }
 
-JNIEnvExt* JNIEnvExt::Create(Thread* self_in, JavaVMExt* vm_in) {
-  std::unique_ptr<JNIEnvExt> ret(new JNIEnvExt(self_in, vm_in));
+JNIEnvExt* JNIEnvExt::Create(Thread* self_in, JavaVMExt* vm_in, std::string* error_msg) {
+  std::unique_ptr<JNIEnvExt> ret(new JNIEnvExt(self_in, vm_in, error_msg));
   if (CheckLocalsValid(ret.get())) {
     return ret.release();
   }
   return nullptr;
 }
 
-JNIEnvExt::JNIEnvExt(Thread* self_in, JavaVMExt* vm_in)
+JNIEnvExt::JNIEnvExt(Thread* self_in, JavaVMExt* vm_in, std::string* error_msg)
     : self(self_in),
       vm(vm_in),
-      local_ref_cookie(IRT_FIRST_SEGMENT),
-      locals(kLocalsInitial, kLocalsMax, kLocal, false),
+      local_ref_cookie(kIRTFirstSegment),
+      locals(kLocalsInitial, kLocal, IndirectReferenceTable::ResizableCapacity::kYes, error_msg),
       check_jni(false),
       runtime_deleted(false),
       critical(0),
@@ -176,18 +174,18 @@
 
 static std::string ComputeMonitorDescription(Thread* self,
                                              jobject obj) REQUIRES_SHARED(Locks::mutator_lock_) {
-  mirror::Object* o = self->DecodeJObject(obj);
+  ObjPtr<mirror::Object> o = self->DecodeJObject(obj);
   if ((o->GetLockWord(false).GetState() == LockWord::kThinLocked) &&
       Locks::mutator_lock_->IsExclusiveHeld(self)) {
     // Getting the identity hashcode here would result in lock inflation and suspension of the
     // current thread, which isn't safe if this is the only runnable thread.
     return StringPrintf("<@addr=0x%" PRIxPTR "> (a %s)",
-                        reinterpret_cast<intptr_t>(o),
-                        PrettyTypeOf(o).c_str());
+                        reinterpret_cast<intptr_t>(o.Ptr()),
+                        o->PrettyTypeOf().c_str());
   } else {
     // IdentityHashCode can cause thread suspension, which would invalidate o if it moved. So
     // we get the pretty type before we call IdentityHashCode.
-    const std::string pretty_type(PrettyTypeOf(o));
+    const std::string pretty_type(o->PrettyTypeOf());
     return StringPrintf("<0x%08x> (a %s)", o->IdentityHashCode(), pretty_type.c_str());
   }
 }
@@ -203,7 +201,7 @@
       [self, frame, monitors](const std::pair<uintptr_t, jobject>& pair)
           REQUIRES_SHARED(Locks::mutator_lock_) {
         if (frame == pair.first) {
-          mirror::Object* o = self->DecodeJObject(pair.second);
+          ObjPtr<mirror::Object> o = self->DecodeJObject(pair.second);
           monitors->Remove(o);
           return true;
         }
@@ -221,7 +219,7 @@
     locked_objects_.erase(it);
   } else {
     // Check whether this monitor was locked in another JNI "session."
-    mirror::Object* mirror_obj = self->DecodeJObject(obj);
+    ObjPtr<mirror::Object> mirror_obj = self->DecodeJObject(obj);
     for (std::pair<uintptr_t, jobject>& pair : locked_objects_) {
       if (self->DecodeJObject(pair.second) == mirror_obj) {
         std::string monitor_descr = ComputeMonitorDescription(self, pair.second);
diff --git a/runtime/jni_env_ext.h b/runtime/jni_env_ext.h
index 121f848..5cca0ae 100644
--- a/runtime/jni_env_ext.h
+++ b/runtime/jni_env_ext.h
@@ -29,12 +29,14 @@
 
 class JavaVMExt;
 
-// Maximum number of local references in the indirect reference table. The value is arbitrary but
+// Number of local references in the indirect reference table. The value is arbitrary but
 // low enough that it forces sanity checks.
-static constexpr size_t kLocalsMax = 512;
+static constexpr size_t kLocalsInitial = 512;
 
 struct JNIEnvExt : public JNIEnv {
-  static JNIEnvExt* Create(Thread* self, JavaVMExt* vm);
+  // Creates a new JNIEnvExt. Returns null on error, in which case error_msg
+  // will contain a description of the error.
+  static JNIEnvExt* Create(Thread* self, JavaVMExt* vm, std::string* error_msg);
 
   ~JNIEnvExt();
 
@@ -62,7 +64,7 @@
   JavaVMExt* const vm;
 
   // Cookie used when using the local indirect reference table.
-  uint32_t local_ref_cookie;
+  IRTSegmentState local_ref_cookie;
 
   // JNI local references.
   IndirectReferenceTable locals GUARDED_BY(Locks::mutator_lock_);
@@ -70,7 +72,7 @@
   // Stack of cookies corresponding to PushLocalFrame/PopLocalFrame calls.
   // TODO: to avoid leaks (and bugs), we need to clear this vector on entry (or return)
   // to a native method.
-  std::vector<uint32_t> stacked_local_ref_cookies;
+  std::vector<IRTSegmentState> stacked_local_ref_cookies;
 
   // Frequently-accessed fields cached from JavaVM.
   bool check_jni;
@@ -103,9 +105,9 @@
   void SetFunctionsToRuntimeShutdownFunctions();
 
  private:
-  // The constructor should not be called directly. It may leave the object in an erronuous state,
+  // The constructor should not be called directly. It may leave the object in an erroneous state,
   // and the result needs to be checked.
-  JNIEnvExt(Thread* self, JavaVMExt* vm);
+  JNIEnvExt(Thread* self, JavaVMExt* vm, std::string* error_msg);
 
   // All locked objects, with the (Java caller) stack frame that locked them. Used in CheckJNI
   // to ensure that only monitors locked in this native frame are being unlocked, and that at
@@ -129,7 +131,7 @@
 
  private:
   JNIEnvExt* const env_;
-  uint32_t saved_local_ref_cookie_;
+  IRTSegmentState saved_local_ref_cookie_;
 
   DISALLOW_COPY_AND_ASSIGN(ScopedJniEnvLocalRefState);
 };
diff --git a/runtime/jni_internal.cc b/runtime/jni_internal.cc
index 621e2df..01a2ad8 100644
--- a/runtime/jni_internal.cc
+++ b/runtime/jni_internal.cc
@@ -88,13 +88,19 @@
   return result;
 }
 
-static void ThrowNoSuchMethodError(ScopedObjectAccess& soa, mirror::Class* c,
-                                   const char* name, const char* sig, const char* kind)
+static void ThrowNoSuchMethodError(ScopedObjectAccess& soa,
+                                   ObjPtr<mirror::Class> c,
+                                   const char* name,
+                                   const char* sig,
+                                   const char* kind)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   std::string temp;
   soa.Self()->ThrowNewExceptionF("Ljava/lang/NoSuchMethodError;",
                                  "no %s method \"%s.%s%s\"",
-                                 kind, c->GetDescriptor(&temp), name, sig);
+                                 kind,
+                                 c->GetDescriptor(&temp),
+                                 name,
+                                 sig);
 }
 
 static void ReportInvalidJNINativeMethod(const ScopedObjectAccess& soa,
@@ -104,7 +110,7 @@
                                          bool return_errors)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   LOG(return_errors ? ::android::base::ERROR : ::android::base::FATAL)
-      << "Failed to register native method in " << PrettyDescriptor(c)
+      << "Failed to register native method in " << c->PrettyDescriptor()
       << " in " << c->GetDexCache()->GetLocation()->ToModifiedUtf8()
       << ": " << kind << " is null at index " << idx;
   soa.Self()->ThrowNewExceptionF("Ljava/lang/NoSuchMethodError;",
@@ -148,17 +154,17 @@
     }
   }
   if (method == nullptr || method->IsStatic() != is_static) {
-    ThrowNoSuchMethodError(soa, c.Ptr(), name, sig, is_static ? "static" : "non-static");
+    ThrowNoSuchMethodError(soa, c, name, sig, is_static ? "static" : "non-static");
     return nullptr;
   }
-  return soa.EncodeMethod(method);
+  return jni::EncodeArtMethod(method);
 }
 
 static ObjPtr<mirror::ClassLoader> GetClassLoader(const ScopedObjectAccess& soa)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   ArtMethod* method = soa.Self()->GetCurrentMethod(nullptr);
   // If we are running Runtime.nativeLoad, use the overriding ClassLoader it set.
-  if (method == soa.DecodeMethod(WellKnownClasses::java_lang_Runtime_nativeLoad)) {
+  if (method == jni::DecodeArtMethod(WellKnownClasses::java_lang_Runtime_nativeLoad)) {
     return soa.Decode<mirror::ClassLoader>(soa.Self()->GetClassLoaderOverride());
   }
   // If we have a method, use its ClassLoader for context.
@@ -229,13 +235,13 @@
                                    sig, name, c->GetDescriptor(&temp));
     return nullptr;
   }
-  return soa.EncodeField(field);
+  return jni::EncodeArtField(field);
 }
 
 static void ThrowAIOOBE(ScopedObjectAccess& soa, mirror::Array* array, jsize start,
                         jsize length, const char* identifier)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  std::string type(PrettyTypeOf(array));
+  std::string type(array->PrettyTypeOf());
   soa.Self()->ThrowNewExceptionF("Ljava/lang/ArrayIndexOutOfBoundsException;",
                                  "%s offset=%d length=%d %s.length=%d",
                                  type.c_str(), start, length, identifier, array->GetLength());
@@ -277,7 +283,7 @@
   if (mid == nullptr) {
     ScopedObjectAccess soa(env);
     LOG(ERROR) << "No <init>" << signature << " in "
-        << PrettyClass(soa.Decode<mirror::Class>(exception_class));
+        << mirror::Class::PrettyClass(soa.Decode<mirror::Class>(exception_class));
     return JNI_ERR;
   }
 
@@ -362,7 +368,7 @@
   static jmethodID FromReflectedMethod(JNIEnv* env, jobject jlr_method) {
     CHECK_NON_NULL_ARGUMENT(jlr_method);
     ScopedObjectAccess soa(env);
-    return soa.EncodeMethod(ArtMethod::FromReflectedMethod(soa, jlr_method));
+    return jni::EncodeArtMethod(ArtMethod::FromReflectedMethod(soa, jlr_method));
   }
 
   static jfieldID FromReflectedField(JNIEnv* env, jobject jlr_field) {
@@ -373,14 +379,14 @@
       // Not even a java.lang.reflect.Field, return null. TODO, is this check necessary?
       return nullptr;
     }
-    ObjPtr<mirror::Field> field = down_cast<mirror::Field*>(obj_field.Ptr());
-    return soa.EncodeField(field->GetArtField());
+    ObjPtr<mirror::Field> field = ObjPtr<mirror::Field>::DownCast(obj_field);
+    return jni::EncodeArtField(field->GetArtField());
   }
 
   static jobject ToReflectedMethod(JNIEnv* env, jclass, jmethodID mid, jboolean) {
     CHECK_NON_NULL_ARGUMENT(mid);
     ScopedObjectAccess soa(env);
-    ArtMethod* m = soa.DecodeMethod(mid);
+    ArtMethod* m = jni::DecodeArtMethod(mid);
     mirror::Executable* method;
     DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
     DCHECK(!Runtime::Current()->IsActiveTransaction());
@@ -395,7 +401,7 @@
   static jobject ToReflectedField(JNIEnv* env, jclass, jfieldID fid, jboolean) {
     CHECK_NON_NULL_ARGUMENT(fid);
     ScopedObjectAccess soa(env);
-    ArtField* f = soa.DecodeField(fid);
+    ArtField* f = jni::DecodeArtField(fid);
     return soa.AddLocalReference<jobject>(
         mirror::Field::CreateFromArtField<kRuntimePointerSize>(soa.Self(), f, true));
   }
@@ -480,11 +486,11 @@
     jmethodID mid = env->GetMethodID(exception_class.get(), "printStackTrace", "()V");
     if (mid == nullptr) {
       LOG(WARNING) << "JNI WARNING: no printStackTrace()V in "
-                   << PrettyTypeOf(old_exception.Get());
+                   << mirror::Object::PrettyTypeOf(old_exception.Get());
     } else {
       env->CallVoidMethod(exception.get(), mid);
       if (soa.Self()->IsExceptionPending()) {
-        LOG(WARNING) << "JNI WARNING: " << PrettyTypeOf(soa.Self()->GetException())
+        LOG(WARNING) << "JNI WARNING: " << mirror::Object::PrettyTypeOf(soa.Self()->GetException())
                      << " thrown while calling printStackTrace";
         soa.Self()->ClearException();
       }
@@ -528,7 +534,7 @@
   static jobject NewGlobalRef(JNIEnv* env, jobject obj) {
     ScopedObjectAccess soa(env);
     ObjPtr<mirror::Object> decoded_obj = soa.Decode<mirror::Object>(obj);
-    return soa.Vm()->AddGlobalRef(soa.Self(), decoded_obj.Ptr());
+    return soa.Vm()->AddGlobalRef(soa.Self(), decoded_obj);
   }
 
   static void DeleteGlobalRef(JNIEnv* env, jobject obj) {
@@ -540,7 +546,7 @@
   static jweak NewWeakGlobalRef(JNIEnv* env, jobject obj) {
     ScopedObjectAccess soa(env);
     ObjPtr<mirror::Object> decoded_obj = soa.Decode<mirror::Object>(obj);
-    return soa.Vm()->AddWeakGlobalRef(soa.Self(), decoded_obj.Ptr());
+    return soa.Vm()->AddWeakGlobalRef(soa.Self(), decoded_obj);
   }
 
   static void DeleteWeakGlobalRef(JNIEnv* env, jweak obj) {
@@ -625,11 +631,11 @@
     }
     if (c->IsStringClass()) {
       // Replace calls to String.<init> with equivalent StringFactory call.
-      jmethodID sf_mid = soa.EncodeMethod(
-          WellKnownClasses::StringInitToStringFactory(soa.DecodeMethod(mid)));
+      jmethodID sf_mid = jni::EncodeArtMethod(
+          WellKnownClasses::StringInitToStringFactory(jni::DecodeArtMethod(mid)));
       return CallStaticObjectMethodV(env, WellKnownClasses::java_lang_StringFactory, sf_mid, args);
     }
-    mirror::Object* result = c->AllocObject(soa.Self());
+    ObjPtr<mirror::Object> result = c->AllocObject(soa.Self());
     if (result == nullptr) {
       return nullptr;
     }
@@ -652,11 +658,11 @@
     }
     if (c->IsStringClass()) {
       // Replace calls to String.<init> with equivalent StringFactory call.
-      jmethodID sf_mid = soa.EncodeMethod(
-          WellKnownClasses::StringInitToStringFactory(soa.DecodeMethod(mid)));
+      jmethodID sf_mid = jni::EncodeArtMethod(
+          WellKnownClasses::StringInitToStringFactory(jni::DecodeArtMethod(mid)));
       return CallStaticObjectMethodA(env, WellKnownClasses::java_lang_StringFactory, sf_mid, args);
     }
-    mirror::Object* result = c->AllocObject(soa.Self());
+    ObjPtr<mirror::Object> result = c->AllocObject(soa.Self());
     if (result == nullptr) {
       return nullptr;
     }
@@ -1231,14 +1237,14 @@
     CHECK_NON_NULL_ARGUMENT(fid);
     ScopedObjectAccess soa(env);
     ObjPtr<mirror::Object> o = soa.Decode<mirror::Object>(obj);
-    ArtField* f = soa.DecodeField(fid);
+    ArtField* f = jni::DecodeArtField(fid);
     return soa.AddLocalReference<jobject>(f->GetObject(o));
   }
 
   static jobject GetStaticObjectField(JNIEnv* env, jclass, jfieldID fid) {
     CHECK_NON_NULL_ARGUMENT(fid);
     ScopedObjectAccess soa(env);
-    ArtField* f = soa.DecodeField(fid);
+    ArtField* f = jni::DecodeArtField(fid);
     return soa.AddLocalReference<jobject>(f->GetObject(f->GetDeclaringClass()));
   }
 
@@ -1248,7 +1254,7 @@
     ScopedObjectAccess soa(env);
     ObjPtr<mirror::Object> o = soa.Decode<mirror::Object>(java_object);
     ObjPtr<mirror::Object> v = soa.Decode<mirror::Object>(java_value);
-    ArtField* f = soa.DecodeField(fid);
+    ArtField* f = jni::DecodeArtField(fid);
     f->SetObject<false>(o, v);
   }
 
@@ -1256,7 +1262,7 @@
     CHECK_NON_NULL_ARGUMENT_RETURN_VOID(fid);
     ScopedObjectAccess soa(env);
     ObjPtr<mirror::Object> v = soa.Decode<mirror::Object>(java_value);
-    ArtField* f = soa.DecodeField(fid);
+    ArtField* f = jni::DecodeArtField(fid);
     f->SetObject<false>(f->GetDeclaringClass(), v);
   }
 
@@ -1265,13 +1271,13 @@
   CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(fid); \
   ScopedObjectAccess soa(env); \
   ObjPtr<mirror::Object> o = soa.Decode<mirror::Object>(instance); \
-  ArtField* f = soa.DecodeField(fid); \
+  ArtField* f = jni::DecodeArtField(fid); \
   return f->Get ##fn (o)
 
 #define GET_STATIC_PRIMITIVE_FIELD(fn) \
   CHECK_NON_NULL_ARGUMENT_RETURN_ZERO(fid); \
   ScopedObjectAccess soa(env); \
-  ArtField* f = soa.DecodeField(fid); \
+  ArtField* f = jni::DecodeArtField(fid); \
   return f->Get ##fn (f->GetDeclaringClass())
 
 #define SET_PRIMITIVE_FIELD(fn, instance, value) \
@@ -1279,13 +1285,13 @@
   CHECK_NON_NULL_ARGUMENT_RETURN_VOID(fid); \
   ScopedObjectAccess soa(env); \
   ObjPtr<mirror::Object> o = soa.Decode<mirror::Object>(instance); \
-  ArtField* f = soa.DecodeField(fid); \
+  ArtField* f = jni::DecodeArtField(fid); \
   f->Set ##fn <false>(o, value)
 
 #define SET_STATIC_PRIMITIVE_FIELD(fn, value) \
   CHECK_NON_NULL_ARGUMENT_RETURN_VOID(fid); \
   ScopedObjectAccess soa(env); \
-  ArtField* f = soa.DecodeField(fid); \
+  ArtField* f = jni::DecodeArtField(fid); \
   f->Set ##fn <false>(f->GetDeclaringClass(), value)
 
   static jboolean GetBooleanField(JNIEnv* env, jobject obj, jfieldID fid) {
@@ -1835,7 +1841,7 @@
     ScopedObjectAccess soa(env);
     ObjPtr<mirror::Object> obj = soa.Decode<mirror::Object>(java_array);
     if (UNLIKELY(!obj->IsArrayInstance())) {
-      soa.Vm()->JniAbortF("GetArrayLength", "not an array: %s", PrettyTypeOf(obj).c_str());
+      soa.Vm()->JniAbortF("GetArrayLength", "not an array: %s", obj->PrettyTypeOf().c_str());
       return 0;
     }
     mirror::Array* array = obj->AsArray();
@@ -1898,12 +1904,13 @@
 
     // Compute the array class corresponding to the given element class.
     ScopedObjectAccess soa(env);
-    mirror::Class* array_class;
+    ObjPtr<mirror::Class> array_class;
     {
-      mirror::Class* element_class = soa.Decode<mirror::Class>(element_jclass).Ptr();
+      ObjPtr<mirror::Class> element_class = soa.Decode<mirror::Class>(element_jclass).Ptr();
       if (UNLIKELY(element_class->IsPrimitive())) {
-        soa.Vm()->JniAbortF("NewObjectArray", "not an object type: %s",
-                            PrettyDescriptor(element_class).c_str());
+        soa.Vm()->JniAbortF("NewObjectArray",
+                            "not an object type: %s",
+                            element_class->PrettyDescriptor().c_str());
         return nullptr;
       }
       ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
@@ -1923,8 +1930,8 @@
         if (UNLIKELY(!element_class->IsAssignableFrom(initial_object->GetClass()))) {
           soa.Vm()->JniAbortF("NewObjectArray", "cannot assign object of type '%s' to array with "
                               "element type of '%s'",
-                              PrettyDescriptor(initial_object->GetClass()).c_str(),
-                              PrettyDescriptor(element_class).c_str());
+                              mirror::Class::PrettyDescriptor(initial_object->GetClass()).c_str(),
+                              element_class->PrettyDescriptor().c_str());
           return nullptr;
         } else {
           for (jsize i = 0; i < length; ++i) {
@@ -1946,7 +1953,7 @@
     ObjPtr<mirror::Array> array = soa.Decode<mirror::Array>(java_array);
     if (UNLIKELY(!array->GetClass()->IsPrimitiveArray())) {
       soa.Vm()->JniAbortF("GetPrimitiveArrayCritical", "expected primitive array, given %s",
-                          PrettyDescriptor(array->GetClass()).c_str());
+                          array->GetClass()->PrettyDescriptor().c_str());
       return nullptr;
     }
     gc::Heap* heap = Runtime::Current()->GetHeap();
@@ -1974,7 +1981,7 @@
     ObjPtr<mirror::Array> array = soa.Decode<mirror::Array>(java_array);
     if (UNLIKELY(!array->GetClass()->IsPrimitiveArray())) {
       soa.Vm()->JniAbortF("ReleasePrimitiveArrayCritical", "expected primitive array, given %s",
-                          PrettyDescriptor(array->GetClass()).c_str());
+                          array->GetClass()->PrettyDescriptor().c_str());
       return;
     }
     const size_t component_size = array->GetClass()->GetComponentSize();
@@ -2155,7 +2162,7 @@
     ObjPtr<mirror::Class> c = soa.Decode<mirror::Class>(java_class);
     if (UNLIKELY(method_count == 0)) {
       LOG(WARNING) << "JNI RegisterNativeMethods: attempt to register 0 native methods for "
-          << PrettyDescriptor(c);
+          << mirror::Class::PrettyDescriptor(c);
       return JNI_OK;
     }
     CHECK_NON_NULL_ARGUMENT_FN_NAME("RegisterNatives", methods, JNI_ERR);
@@ -2243,20 +2250,20 @@
             mirror::Class::kDumpClassFullDetail);
         LOG(return_errors ? ::android::base::ERROR : ::android::base::FATAL)
             << "Failed to register native method "
-            << PrettyDescriptor(c) << "." << name << sig << " in "
+            << c->PrettyDescriptor() << "." << name << sig << " in "
             << c->GetDexCache()->GetLocation()->ToModifiedUtf8();
-        ThrowNoSuchMethodError(soa, c.Ptr(), name, sig, "static or non-static");
+        ThrowNoSuchMethodError(soa, c, name, sig, "static or non-static");
         return JNI_ERR;
       } else if (!m->IsNative()) {
         LOG(return_errors ? ::android::base::ERROR : ::android::base::FATAL)
             << "Failed to register non-native method "
-            << PrettyDescriptor(c) << "." << name << sig
+            << c->PrettyDescriptor() << "." << name << sig
             << " as native";
-        ThrowNoSuchMethodError(soa, c.Ptr(), name, sig, "native");
+        ThrowNoSuchMethodError(soa, c, name, sig, "native");
         return JNI_ERR;
       }
 
-      VLOG(jni) << "[Registering JNI native method " << PrettyMethod(m) << "]";
+      VLOG(jni) << "[Registering JNI native method " << m->PrettyMethod() << "]";
 
       is_fast = is_fast || m->IsFastNative();  // Merge with @FastNative state.
       m->RegisterNative(fnPtr, is_fast);
@@ -2269,7 +2276,7 @@
     ScopedObjectAccess soa(env);
     ObjPtr<mirror::Class> c = soa.Decode<mirror::Class>(java_class);
 
-    VLOG(jni) << "[Unregistering JNI native methods for " << PrettyClass(c) << "]";
+    VLOG(jni) << "[Unregistering JNI native methods for " << mirror::Class::PrettyClass(c) << "]";
 
     size_t unregistered_count = 0;
     auto pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
@@ -2282,7 +2289,7 @@
 
     if (unregistered_count == 0) {
       LOG(WARNING) << "JNI UnregisterNatives: attempt to unregister native methods of class '"
-          << PrettyDescriptor(c) << "' that contains no native methods";
+          << mirror::Class::PrettyDescriptor(c) << "' that contains no native methods";
     }
     return JNI_OK;
   }
@@ -2295,7 +2302,7 @@
     if (soa.Self()->IsExceptionPending()) {
       return JNI_ERR;
     }
-    soa.Env()->monitors.Add(o.Ptr());
+    soa.Env()->monitors.Add(o);
     return JNI_OK;
   }
 
@@ -2307,7 +2314,7 @@
     if (soa.Self()->IsExceptionPending()) {
       return JNI_ERR;
     }
-    soa.Env()->monitors.Remove(o.Ptr());
+    soa.Env()->monitors.Remove(o);
     return JNI_OK;
   }
 
@@ -2367,7 +2374,7 @@
 
     // Do we definitely know what kind of reference this is?
     IndirectRef ref = reinterpret_cast<IndirectRef>(java_object);
-    IndirectRefKind kind = GetIndirectRefKind(ref);
+    IndirectRefKind kind = IndirectReferenceTable::GetIndirectRefKind(ref);
     switch (kind) {
     case kLocal:
       return JNILocalRefType;
@@ -2388,13 +2395,13 @@
                                           const char* caller)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     // TODO: we should try to expand the table if necessary.
-    if (desired_capacity < 0 || desired_capacity > static_cast<jint>(kLocalsMax)) {
+    if (desired_capacity < 0 || desired_capacity > static_cast<jint>(kLocalsInitial)) {
       LOG(ERROR) << "Invalid capacity given to " << caller << ": " << desired_capacity;
       return JNI_ERR;
     }
     // TODO: this isn't quite right, since "capacity" includes holes.
     const size_t capacity = soa.Env()->locals.Capacity();
-    bool okay = (static_cast<jint>(kLocalsMax - capacity) >= desired_capacity);
+    bool okay = (static_cast<jint>(kLocalsInitial - capacity) >= desired_capacity);
     if (!okay) {
       soa.Self()->ThrowOutOfMemoryError(caller);
     }
@@ -2421,8 +2428,9 @@
       soa.Vm()->JniAbortF(fn_name,
                           "attempt to %s %s primitive array elements with an object of type %s",
                           operation,
-                          PrettyDescriptor(ArtArrayT::GetArrayClass()->GetComponentType()).c_str(),
-                          PrettyDescriptor(array->GetClass()).c_str());
+                          mirror::Class::PrettyDescriptor(
+                              ArtArrayT::GetArrayClass()->GetComponentType()).c_str(),
+                          mirror::Class::PrettyDescriptor(array->GetClass()).c_str());
       return nullptr;
     }
     DCHECK_EQ(sizeof(ElementT), array->GetClass()->GetComponentSize());
@@ -2481,7 +2489,7 @@
       // Sanity check: If elements is not the same as the java array's data, it better not be a
       // heap address. TODO: This might be slow to check, may be worth keeping track of which
       // copies we make?
-      if (heap->IsNonDiscontinuousSpaceHeapAddress(reinterpret_cast<mirror::Object*>(elements))) {
+      if (heap->IsNonDiscontinuousSpaceHeapAddress(elements)) {
         soa.Vm()->JniAbortF("ReleaseArrayElements",
                             "invalid element pointer %p, array elements are %p",
                             reinterpret_cast<void*>(elements), array_data);
diff --git a/runtime/jni_internal.h b/runtime/jni_internal.h
index b829934..b3837c4 100644
--- a/runtime/jni_internal.h
+++ b/runtime/jni_internal.h
@@ -20,6 +20,8 @@
 #include <jni.h>
 #include <iosfwd>
 
+#include "base/macros.h"
+
 #ifndef NATIVE_METHOD
 #define NATIVE_METHOD(className, functionName, signature) \
   { #functionName, signature, reinterpret_cast<void*>(className ## _ ## functionName) }
@@ -36,6 +38,9 @@
 
 namespace art {
 
+class ArtField;
+class ArtMethod;
+
 const JNINativeInterface* GetJniNativeInterface();
 const JNINativeInterface* GetRuntimeShutdownNativeInterface();
 
@@ -46,6 +51,29 @@
 
 int ThrowNewException(JNIEnv* env, jclass exception_class, const char* msg, jobject cause);
 
+namespace jni {
+
+ALWAYS_INLINE
+static inline ArtField* DecodeArtField(jfieldID fid) {
+  return reinterpret_cast<ArtField*>(fid);
+}
+
+ALWAYS_INLINE
+static inline jfieldID EncodeArtField(ArtField* field) {
+  return reinterpret_cast<jfieldID>(field);
+}
+
+ALWAYS_INLINE
+static inline jmethodID EncodeArtMethod(ArtMethod* art_method) {
+  return reinterpret_cast<jmethodID>(art_method);
+}
+
+ALWAYS_INLINE
+static inline ArtMethod* DecodeArtMethod(jmethodID method_id) {
+  return reinterpret_cast<ArtMethod*>(method_id);
+}
+
+}  // namespace jni
 }  // namespace art
 
 std::ostream& operator<<(std::ostream& os, const jobjectRefType& rhs);
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index fbd670c..a421c34 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -58,7 +58,7 @@
   void ExpectException(jclass exception_class) {
     ScopedObjectAccess soa(env_);
     EXPECT_TRUE(env_->ExceptionCheck())
-        << PrettyDescriptor(soa.Decode<mirror::Class>(exception_class));
+        << mirror::Class::PrettyDescriptor(soa.Decode<mirror::Class>(exception_class));
     jthrowable exception = env_->ExceptionOccurred();
     EXPECT_NE(nullptr, exception);
     env_->ExceptionClear();
@@ -679,12 +679,8 @@
   ASSERT_TRUE(env_->IsInstanceOf(o, c));
   // ...whose fields haven't been initialized because
   // we didn't call a constructor.
-  if (art::mirror::kUseStringCompression) {
-    // Zero-length string is compressed, so the length internally will be -(1 << 31).
-    ASSERT_EQ(-2147483648, env_->GetIntField(o, env_->GetFieldID(c, "count", "I")));
-  } else {
-    ASSERT_EQ(0, env_->GetIntField(o, env_->GetFieldID(c, "count", "I")));
-  }
+  // Even with string compression empty string has `count == 0`.
+  ASSERT_EQ(0, env_->GetIntField(o, env_->GetFieldID(c, "count", "I")));
 }
 
 TEST_F(JniInternalTest, GetVersion) {
@@ -865,6 +861,11 @@
   GetStaticMethodIdBadArgumentTest(true);
 }
 
+static size_t GetLocalsCapacity(JNIEnv* env) {
+  ScopedObjectAccess soa(Thread::Current());
+  return reinterpret_cast<JNIEnvExt*>(env)->locals.Capacity();
+}
+
 TEST_F(JniInternalTest, FromReflectedField_ToReflectedField) {
   jclass jlrField = env_->FindClass("java/lang/reflect/Field");
   jclass c = env_->FindClass("java/lang/String");
@@ -873,11 +874,15 @@
   ASSERT_NE(fid, nullptr);
   // Turn the fid into a java.lang.reflect.Field...
   jobject field = env_->ToReflectedField(c, fid, JNI_FALSE);
-  for (size_t i = 0; i <= kLocalsMax; ++i) {
+  size_t capacity_before = GetLocalsCapacity(env_);
+  for (size_t i = 0; i <= 10; ++i) {
     // Regression test for b/18396311, ToReflectedField leaking local refs causing a local
     // reference table overflows with 512 references to ArtField
     env_->DeleteLocalRef(env_->ToReflectedField(c, fid, JNI_FALSE));
   }
+  size_t capacity_after = GetLocalsCapacity(env_);
+  ASSERT_EQ(capacity_before, capacity_after);
+
   ASSERT_NE(c, nullptr);
   ASSERT_TRUE(env_->IsInstanceOf(field, jlrField));
   // ...and back again.
@@ -886,11 +891,12 @@
   // Make sure we can actually use it.
   jstring s = env_->NewStringUTF("poop");
   if (mirror::kUseStringCompression) {
-    // Negative because s is compressed (first bit is 1)
-    ASSERT_EQ(-2147483644, env_->GetIntField(s, fid2));
+    ASSERT_EQ(mirror::String::GetFlaggedCount(4, /* compressible */ true),
+              env_->GetIntField(s, fid2));
     // Create incompressible string
     jstring s_16 = env_->NewStringUTF("\u0444\u0444");
-    ASSERT_EQ(2, env_->GetIntField(s_16, fid2));
+    ASSERT_EQ(mirror::String::GetFlaggedCount(2, /* compressible */ false),
+              env_->GetIntField(s_16, fid2));
   } else {
     ASSERT_EQ(4, env_->GetIntField(s, fid2));
   }
@@ -911,11 +917,14 @@
   ASSERT_NE(mid, nullptr);
   // Turn the mid into a java.lang.reflect.Constructor...
   jobject method = env_->ToReflectedMethod(c, mid, JNI_FALSE);
-  for (size_t i = 0; i <= kLocalsMax; ++i) {
+  size_t capacity_before = GetLocalsCapacity(env_);
+  for (size_t i = 0; i <= 10; ++i) {
     // Regression test for b/18396311, ToReflectedMethod leaking local refs causing a local
     // reference table overflows with 512 references to ArtMethod
     env_->DeleteLocalRef(env_->ToReflectedMethod(c, mid, JNI_FALSE));
   }
+  size_t capacity_after = GetLocalsCapacity(env_);
+  ASSERT_EQ(capacity_before, capacity_after);
   ASSERT_NE(method, nullptr);
   ASSERT_TRUE(env_->IsInstanceOf(method, jlrConstructor));
   // ...and back again.
@@ -2295,20 +2304,26 @@
   // The segment_state_ field is private, and we want to avoid friend declaration. So we'll check
   // by modifying memory.
   // The parameters don't really matter here.
-  IndirectReferenceTable irt(5, 5, IndirectRefKind::kGlobal, true);
-  uint32_t old_state = irt.GetSegmentState();
+  std::string error_msg;
+  IndirectReferenceTable irt(5,
+                             IndirectRefKind::kGlobal,
+                             IndirectReferenceTable::ResizableCapacity::kNo,
+                             &error_msg);
+  ASSERT_TRUE(irt.IsValid()) << error_msg;
+  IRTSegmentState old_state = irt.GetSegmentState();
 
   // Write some new state directly. We invert parts of old_state to ensure a new value.
-  uint32_t new_state = old_state ^ 0x07705005;
-  ASSERT_NE(old_state, new_state);
+  IRTSegmentState new_state;
+  new_state.top_index = old_state.top_index ^ 0x07705005;
+  ASSERT_NE(old_state.top_index, new_state.top_index);
 
   uint8_t* base = reinterpret_cast<uint8_t*>(&irt);
   int32_t segment_state_offset =
       IndirectReferenceTable::SegmentStateOffset(sizeof(void*)).Int32Value();
-  *reinterpret_cast<uint32_t*>(base + segment_state_offset) = new_state;
+  *reinterpret_cast<IRTSegmentState*>(base + segment_state_offset) = new_state;
 
   // Read and compare.
-  EXPECT_EQ(new_state, irt.GetSegmentState());
+  EXPECT_EQ(new_state.top_index, irt.GetSegmentState().top_index);
 }
 
 // Test the offset computation of JNIEnvExt offsets. b/26071368.
diff --git a/test/562-no-intermediate/src/Main.java b/runtime/jvalue-inl.h
similarity index 68%
copy from test/562-no-intermediate/src/Main.java
copy to runtime/jvalue-inl.h
index 3b74d6f..b33686c 100644
--- a/test/562-no-intermediate/src/Main.java
+++ b/runtime/jvalue-inl.h
@@ -14,14 +14,19 @@
  * limitations under the License.
  */
 
-public class Main {
+#ifndef ART_RUNTIME_JVALUE_INL_H_
+#define ART_RUNTIME_JVALUE_INL_H_
 
-  /// CHECK-START-ARM64: int Main.main(String[]) register_allocator (after)
-  /// CHECK-NOT: IntermediateAddress
-  public static void main(String[] args) {
-    array[index] += Math.cos(42);
-  }
+#include "jvalue.h"
 
-  static int index = 0;
-  static double[] array = new double[2];
+#include "obj_ptr.h"
+
+namespace art {
+
+inline void JValue::SetL(ObjPtr<mirror::Object> new_l) {
+  l = new_l.Ptr();
 }
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_JVALUE_INL_H_
diff --git a/runtime/jvalue.h b/runtime/jvalue.h
index 7b91b0b..52a0f23 100644
--- a/runtime/jvalue.h
+++ b/runtime/jvalue.h
@@ -18,9 +18,12 @@
 #define ART_RUNTIME_JVALUE_H_
 
 #include "base/macros.h"
+#include "base/mutex.h"
 
 #include <stdint.h>
 
+#include "obj_ptr.h"
+
 namespace art {
 namespace mirror {
 class Object;
@@ -52,8 +55,10 @@
   int64_t GetJ() const { return j; }
   void SetJ(int64_t new_j) { j = new_j; }
 
-  mirror::Object* GetL() const { return l; }
-  void SetL(mirror::Object* new_l) { l = new_l; }
+  mirror::Object* GetL() const REQUIRES_SHARED(Locks::mutator_lock_) {
+    return l;
+  }
+  void SetL(ObjPtr<mirror::Object> new_l) REQUIRES_SHARED(Locks::mutator_lock_);
 
   int16_t GetS() const { return s; }
   void SetS(int16_t new_s) {
diff --git a/runtime/leb128.h b/runtime/leb128.h
index 74934ae..31459af 100644
--- a/runtime/leb128.h
+++ b/runtime/leb128.h
@@ -53,6 +53,49 @@
   return static_cast<uint32_t>(result);
 }
 
+static inline bool DecodeUnsignedLeb128Checked(const uint8_t** data,
+                                               const void* end,
+                                               uint32_t* out) {
+  const uint8_t* ptr = *data;
+  if (ptr >= end) {
+    return false;
+  }
+  int result = *(ptr++);
+  if (UNLIKELY(result > 0x7f)) {
+    if (ptr >= end) {
+      return false;
+    }
+    int cur = *(ptr++);
+    result = (result & 0x7f) | ((cur & 0x7f) << 7);
+    if (cur > 0x7f) {
+      if (ptr >= end) {
+        return false;
+      }
+      cur = *(ptr++);
+      result |= (cur & 0x7f) << 14;
+      if (cur > 0x7f) {
+        if (ptr >= end) {
+          return false;
+        }
+        cur = *(ptr++);
+        result |= (cur & 0x7f) << 21;
+        if (cur > 0x7f) {
+          if (ptr >= end) {
+            return false;
+          }
+          // Note: We don't check to see if cur is out of range here,
+          // meaning we tolerate garbage in the four high-order bits.
+          cur = *(ptr++);
+          result |= cur << 28;
+        }
+      }
+    }
+  }
+  *data = ptr;
+  *out = static_cast<uint32_t>(result);
+  return true;
+}
+
 // Reads an unsigned LEB128 + 1 value. updating the given pointer to point
 // just past the end of the read value. This function tolerates
 // non-zero high-order bits in the fifth encoded byte.
@@ -97,6 +140,57 @@
   return result;
 }
 
+static inline bool DecodeSignedLeb128Checked(const uint8_t** data,
+                                             const void* end,
+                                             int32_t* out) {
+  const uint8_t* ptr = *data;
+  if (ptr >= end) {
+    return false;
+  }
+  int32_t result = *(ptr++);
+  if (result <= 0x7f) {
+    result = (result << 25) >> 25;
+  } else {
+    if (ptr >= end) {
+      return false;
+    }
+    int cur = *(ptr++);
+    result = (result & 0x7f) | ((cur & 0x7f) << 7);
+    if (cur <= 0x7f) {
+      result = (result << 18) >> 18;
+    } else {
+      if (ptr >= end) {
+        return false;
+      }
+      cur = *(ptr++);
+      result |= (cur & 0x7f) << 14;
+      if (cur <= 0x7f) {
+        result = (result << 11) >> 11;
+      } else {
+        if (ptr >= end) {
+          return false;
+        }
+        cur = *(ptr++);
+        result |= (cur & 0x7f) << 21;
+        if (cur <= 0x7f) {
+          result = (result << 4) >> 4;
+        } else {
+          if (ptr >= end) {
+            return false;
+          }
+          // Note: We don't check to see if cur is out of range here,
+          // meaning we tolerate garbage in the four high-order bits.
+          cur = *(ptr++);
+          result |= cur << 28;
+        }
+      }
+    }
+  }
+  *data = ptr;
+  *out = static_cast<uint32_t>(result);
+  return true;
+}
+
 // Returns the number of bytes needed to encode the value in unsigned LEB128.
 static inline uint32_t UnsignedLeb128Size(uint32_t data) {
   // bits_to_encode = (data != 0) ? 32 - CLZ(x) : 1  // 32 - CLZ(data | 1)
diff --git a/runtime/lock_word.h b/runtime/lock_word.h
index 538b6eb..2f2565b 100644
--- a/runtime/lock_word.h
+++ b/runtime/lock_word.h
@@ -61,7 +61,7 @@
  */
 class LockWord {
  public:
-  enum SizeShiftsAndMasks {  // private marker to avoid generate-operator-out.py from processing.
+  enum SizeShiftsAndMasks : uint32_t {  // private marker to avoid generate-operator-out.py from processing.
     // Number of bits to encode the state, currently just fat or thin/unlocked or hash code.
     kStateSize = 2,
     kReadBarrierStateSize = 1,
@@ -91,6 +91,8 @@
     kStateFat = 1,
     kStateHash = 2,
     kStateForwardingAddress = 3,
+    kStateForwardingAddressShifted = kStateForwardingAddress << kStateShift,
+    kStateForwardingAddressOverflow = (1 + kStateMask - kStateForwardingAddress) << kStateShift,
 
     // Read barrier bit.
     kReadBarrierStateShift = kThinLockCountSize + kThinLockCountShift,
@@ -140,7 +142,7 @@
 
   static LockWord FromForwardingAddress(size_t target) {
     DCHECK_ALIGNED(target, (1 << kStateSize));
-    return LockWord((target >> kForwardingAddressShift) | (kStateForwardingAddress << kStateShift));
+    return LockWord((target >> kForwardingAddressShift) | kStateForwardingAddressShifted);
   }
 
   static LockWord FromHashCode(uint32_t hash_code, uint32_t gc_state) {
@@ -202,6 +204,8 @@
 
   void SetReadBarrierState(uint32_t rb_state) {
     DCHECK_EQ(rb_state & ~kReadBarrierStateMask, 0U);
+    DCHECK(rb_state == ReadBarrier::WhiteState() ||
+           rb_state == ReadBarrier::GrayState()) << rb_state;
     DCHECK_NE(static_cast<uint32_t>(GetState()), static_cast<uint32_t>(kForwardingAddress));
     // Clear and or the bits.
     value_ &= ~(kReadBarrierStateMask << kReadBarrierStateShift);
@@ -256,6 +260,14 @@
   LockWord();
 
   explicit LockWord(uint32_t val) : value_(val) {
+    // Make sure adding the overflow causes an overflow.
+    constexpr uint64_t overflow = static_cast<uint64_t>(kStateForwardingAddressShifted) +
+        static_cast<uint64_t>(kStateForwardingAddressOverflow);
+    constexpr bool is_larger = overflow > static_cast<uint64_t>(0xFFFFFFFF);
+    static_assert(is_larger, "should have overflowed");
+    static_assert(
+         (~kStateForwardingAddress & kStateMask) == 0,
+        "READ_BARRIER_MARK_REG relies on the forwarding address state being only one bits");
     CheckReadBarrierState();
   }
 
@@ -270,9 +282,8 @@
       if (!kUseReadBarrier) {
         DCHECK_EQ(rb_state, 0U);
       } else {
-        DCHECK(rb_state == ReadBarrier::white_ptr_ ||
-               rb_state == ReadBarrier::gray_ptr_ ||
-               rb_state == ReadBarrier::black_ptr_) << rb_state;
+        DCHECK(rb_state == ReadBarrier::WhiteState() ||
+               rb_state == ReadBarrier::GrayState()) << rb_state;
       }
     }
   }
diff --git a/runtime/mem_map.cc b/runtime/mem_map.cc
index bb07fcb..1ec59b3 100644
--- a/runtime/mem_map.cc
+++ b/runtime/mem_map.cc
@@ -318,11 +318,18 @@
     debug_friendly_name += name;
     fd.Reset(ashmem_create_region(debug_friendly_name.c_str(), page_aligned_byte_count),
              /* check_usage */ false);
+
     if (fd.Fd() == -1) {
-      *error_msg = StringPrintf("ashmem_create_region failed for '%s': %s", name, strerror(errno));
-      return nullptr;
+      // We failed to create the ashmem region. Print a warning, but continue
+      // anyway by creating a true anonymous mmap with an fd of -1. It is
+      // better to use an unlabelled anonymous map than to fail to create a
+      // map at all.
+      PLOG(WARNING) << "ashmem_create_region failed for '" << name << "'";
+    } else {
+      // We succeeded in creating the ashmem region. Use the created ashmem
+      // region as backing for the mmap.
+      flags &= ~MAP_ANONYMOUS;
     }
-    flags &= ~MAP_ANONYMOUS;
   }
 
   // We need to store and potentially set an error number for pretty printing of errors
@@ -354,7 +361,6 @@
     }
     return nullptr;
   }
-  std::ostringstream check_map_request_error_msg;
   if (!CheckMapRequest(expected_ptr, actual, page_aligned_byte_count, error_msg)) {
     return nullptr;
   }
@@ -441,7 +447,6 @@
     }
     return nullptr;
   }
-  std::ostringstream check_map_request_error_msg;
   if (!CheckMapRequest(expected_ptr, actual, page_aligned_byte_count, error_msg)) {
     return nullptr;
   }
@@ -918,4 +923,23 @@
   }
 }
 
+void ZeroAndReleasePages(void* address, size_t length) {
+  uint8_t* const mem_begin = reinterpret_cast<uint8_t*>(address);
+  uint8_t* const mem_end = mem_begin + length;
+  uint8_t* const page_begin = AlignUp(mem_begin, kPageSize);
+  uint8_t* const page_end = AlignDown(mem_end, kPageSize);
+  if (!kMadviseZeroes || page_begin >= page_end) {
+    // No possible area to madvise.
+    std::fill(mem_begin, mem_end, 0);
+  } else {
+    // Spans one or more pages.
+    DCHECK_LE(mem_begin, page_begin);
+    DCHECK_LE(page_begin, page_end);
+    DCHECK_LE(page_end, mem_end);
+    std::fill(mem_begin, page_begin, 0);
+    CHECK_NE(madvise(page_begin, page_end - page_begin, MADV_DONTNEED), -1) << "madvise failed";
+    std::fill(page_end, mem_end, 0);
+  }
+}
+
 }  // namespace art
diff --git a/runtime/mem_map.h b/runtime/mem_map.h
index 597f0d4..049ae12 100644
--- a/runtime/mem_map.h
+++ b/runtime/mem_map.h
@@ -241,9 +241,13 @@
 
   friend class MemMapTest;  // To allow access to base_begin_ and base_size_.
 };
+
 std::ostream& operator<<(std::ostream& os, const MemMap& mem_map);
 std::ostream& operator<<(std::ostream& os, const MemMap::Maps& mem_maps);
 
+// Zero and release pages if possible, no requirements on alignments.
+void ZeroAndReleasePages(void* address, size_t length);
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_MEM_MAP_H_
diff --git a/runtime/method_handles-inl.h b/runtime/method_handles-inl.h
new file mode 100644
index 0000000..1240792
--- /dev/null
+++ b/runtime/method_handles-inl.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_METHOD_HANDLES_INL_H_
+#define ART_RUNTIME_METHOD_HANDLES_INL_H_
+
+#include "method_handles.h"
+
+#include "common_throws.h"
+#include "dex_instruction.h"
+#include "interpreter/interpreter_common.h"
+#include "jvalue.h"
+#include "mirror/class.h"
+#include "mirror/method_type.h"
+#include "mirror/object.h"
+#include "reflection.h"
+#include "stack.h"
+
+namespace art {
+
+inline bool ConvertArgumentValue(Handle<mirror::MethodType> callsite_type,
+                                 Handle<mirror::MethodType> callee_type,
+                                 int index,
+                                 JValue* value) REQUIRES_SHARED(Locks::mutator_lock_) {
+  ObjPtr<mirror::Class> from_class(callsite_type->GetPTypes()->GetWithoutChecks(index));
+  ObjPtr<mirror::Class> to_class(callee_type->GetPTypes()->GetWithoutChecks(index));
+  if (from_class == to_class) {
+    return true;
+  }
+
+  // |value| may contain a bare heap pointer which is generally
+  // |unsafe. ConvertJValueCommon() saves |value|, |from_class|, and
+  // |to_class| to Handles where necessary to avoid issues if the heap
+  // changes.
+  if (ConvertJValueCommon(callsite_type, callee_type, from_class, to_class, value)) {
+    DCHECK(!Thread::Current()->IsExceptionPending());
+    return true;
+  } else {
+    DCHECK(Thread::Current()->IsExceptionPending());
+    value->SetJ(0);
+    return false;
+  }
+}
+
+inline bool ConvertReturnValue(Handle<mirror::MethodType> callsite_type,
+                               Handle<mirror::MethodType> callee_type,
+                               JValue* value)  REQUIRES_SHARED(Locks::mutator_lock_) {
+  ObjPtr<mirror::Class> from_class(callee_type->GetRType());
+  ObjPtr<mirror::Class> to_class(callsite_type->GetRType());
+  if (to_class->GetPrimitiveType() == Primitive::kPrimVoid || from_class == to_class) {
+    return true;
+  }
+
+  // |value| may contain a bare heap pointer which is generally
+  // unsafe. ConvertJValueCommon() saves |value|, |from_class|, and
+  // |to_class| to Handles where necessary to avoid issues if the heap
+  // changes.
+  if (ConvertJValueCommon(callsite_type, callee_type, from_class, to_class, value)) {
+    DCHECK(!Thread::Current()->IsExceptionPending());
+    return true;
+  } else {
+    DCHECK(Thread::Current()->IsExceptionPending());
+    value->SetJ(0);
+    return false;
+  }
+}
+
+template <typename G, typename S>
+bool PerformConversions(Thread* self,
+                        Handle<mirror::MethodType> callsite_type,
+                        Handle<mirror::MethodType> callee_type,
+                        G* getter,
+                        S* setter,
+                        int32_t num_conversions) REQUIRES_SHARED(Locks::mutator_lock_) {
+  StackHandleScope<2> hs(self);
+  Handle<mirror::ObjectArray<mirror::Class>> from_types(hs.NewHandle(callsite_type->GetPTypes()));
+  Handle<mirror::ObjectArray<mirror::Class>> to_types(hs.NewHandle(callee_type->GetPTypes()));
+
+  for (int32_t i = 0; i < num_conversions; ++i) {
+    ObjPtr<mirror::Class> from(from_types->GetWithoutChecks(i));
+    ObjPtr<mirror::Class> to(to_types->GetWithoutChecks(i));
+    const Primitive::Type from_type = from_types->GetWithoutChecks(i)->GetPrimitiveType();
+    const Primitive::Type to_type = to_types->GetWithoutChecks(i)->GetPrimitiveType();
+    if (from == to) {
+      // Easy case - the types are identical. Nothing left to do except to pass
+      // the arguments along verbatim.
+      if (Primitive::Is64BitType(from_type)) {
+        setter->SetLong(getter->GetLong());
+      } else if (from_type == Primitive::kPrimNot) {
+        setter->SetReference(getter->GetReference());
+      } else {
+        setter->Set(getter->Get());
+      }
+    } else {
+      JValue value;
+
+      if (Primitive::Is64BitType(from_type)) {
+        value.SetJ(getter->GetLong());
+      } else if (from_type == Primitive::kPrimNot) {
+        value.SetL(getter->GetReference());
+      } else {
+        value.SetI(getter->Get());
+      }
+
+      // Caveat emptor - ObjPtr's not guaranteed valid after this call.
+      if (!ConvertArgumentValue(callsite_type, callee_type, i, &value)) {
+        DCHECK(self->IsExceptionPending());
+        return false;
+      }
+
+      if (Primitive::Is64BitType(to_type)) {
+        setter->SetLong(value.GetJ());
+      } else if (to_type == Primitive::kPrimNot) {
+        setter->SetReference(value.GetL());
+      } else {
+        setter->Set(value.GetI());
+      }
+    }
+  }
+
+  return true;
+}
+
+template <bool is_range>
+bool ConvertAndCopyArgumentsFromCallerFrame(Thread* self,
+                                            Handle<mirror::MethodType> callsite_type,
+                                            Handle<mirror::MethodType> callee_type,
+                                            const ShadowFrame& caller_frame,
+                                            uint32_t first_src_reg,
+                                            uint32_t first_dest_reg,
+                                            const uint32_t (&arg)[Instruction::kMaxVarArgRegs],
+                                            ShadowFrame* callee_frame)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  ObjPtr<mirror::ObjectArray<mirror::Class>> from_types(callsite_type->GetPTypes());
+  ObjPtr<mirror::ObjectArray<mirror::Class>> to_types(callee_type->GetPTypes());
+
+  const int32_t num_method_params = from_types->GetLength();
+  if (to_types->GetLength() != num_method_params) {
+    ThrowWrongMethodTypeException(callee_type.Get(), callsite_type.Get());
+    return false;
+  }
+
+  ShadowFrameGetter<is_range> getter(first_src_reg, arg, caller_frame);
+  ShadowFrameSetter setter(callee_frame, first_dest_reg);
+
+  return PerformConversions<ShadowFrameGetter<is_range>, ShadowFrameSetter>(self,
+                                                                            callsite_type,
+                                                                            callee_type,
+                                                                            &getter,
+                                                                            &setter,
+                                                                            num_method_params);
+}
+
+}  // namespace art
+
+#endif  // ART_RUNTIME_METHOD_HANDLES_INL_H_
diff --git a/runtime/method_handles.cc b/runtime/method_handles.cc
new file mode 100644
index 0000000..3c22d7f
--- /dev/null
+++ b/runtime/method_handles.cc
@@ -0,0 +1,282 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "method_handles.h"
+
+#include "method_handles-inl.h"
+#include "jvalue.h"
+#include "jvalue-inl.h"
+#include "reflection.h"
+#include "reflection-inl.h"
+#include "well_known_classes.h"
+
+namespace art {
+
+namespace {
+
+#define PRIMITIVES_LIST(V) \
+  V(Primitive::kPrimBoolean, Boolean, Boolean, Z) \
+  V(Primitive::kPrimByte, Byte, Byte, B)          \
+  V(Primitive::kPrimChar, Char, Character, C)     \
+  V(Primitive::kPrimShort, Short, Short, S)       \
+  V(Primitive::kPrimInt, Int, Integer, I)         \
+  V(Primitive::kPrimLong, Long, Long, J)          \
+  V(Primitive::kPrimFloat, Float, Float, F)       \
+  V(Primitive::kPrimDouble, Double, Double, D)
+
+// Assigns |type| to the primitive type associated with |klass|. Returns
+// true iff. |klass| was a boxed type (Integer, Long etc.), false otherwise.
+bool GetUnboxedPrimitiveType(ObjPtr<mirror::Class> klass, Primitive::Type* type)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  ScopedAssertNoThreadSuspension ants(__FUNCTION__);
+#define LOOKUP_PRIMITIVE(primitive, _, __, ___)                         \
+  if (klass->DescriptorEquals(Primitive::BoxedDescriptor(primitive))) { \
+    *type = primitive;                                                  \
+    return true;                                                        \
+  }
+
+  PRIMITIVES_LIST(LOOKUP_PRIMITIVE);
+#undef LOOKUP_PRIMITIVE
+  return false;
+}
+
+ObjPtr<mirror::Class> GetBoxedPrimitiveClass(Primitive::Type type)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  ScopedAssertNoThreadSuspension ants(__FUNCTION__);
+  jmethodID m = nullptr;
+  switch (type) {
+#define CASE_PRIMITIVE(primitive, _, java_name, __)              \
+    case primitive:                                              \
+      m = WellKnownClasses::java_lang_ ## java_name ## _valueOf; \
+      break;
+    PRIMITIVES_LIST(CASE_PRIMITIVE);
+#undef CASE_PRIMITIVE
+    case Primitive::Type::kPrimNot:
+    case Primitive::Type::kPrimVoid:
+      return nullptr;
+  }
+  return jni::DecodeArtMethod(m)->GetDeclaringClass();
+}
+
+bool GetUnboxedTypeAndValue(ObjPtr<mirror::Object> o, Primitive::Type* type, JValue* value)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  ScopedAssertNoThreadSuspension ants(__FUNCTION__);
+  ObjPtr<mirror::Class> klass = o->GetClass();
+  ArtField* primitive_field = &klass->GetIFieldsPtr()->At(0);
+#define CASE_PRIMITIVE(primitive, abbrev, _, shorthand)         \
+  if (klass == GetBoxedPrimitiveClass(primitive)) {             \
+    *type = primitive;                                          \
+    value->Set ## shorthand(primitive_field->Get ## abbrev(o)); \
+    return true;                                                \
+  }
+  PRIMITIVES_LIST(CASE_PRIMITIVE)
+#undef CASE_PRIMITIVE
+  return false;
+}
+
+inline bool IsReferenceType(Primitive::Type type) {
+  return type == Primitive::kPrimNot;
+}
+
+inline bool IsPrimitiveType(Primitive::Type type) {
+  return !IsReferenceType(type);
+}
+
+}  // namespace
+
+bool IsParameterTypeConvertible(ObjPtr<mirror::Class> from, ObjPtr<mirror::Class> to)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  // This function returns true if there's any conceivable conversion
+  // between |from| and |to|. It's expected this method will be used
+  // to determine if a WrongMethodTypeException should be raised. The
+  // decision logic follows the documentation for MethodType.asType().
+  if (from == to) {
+    return true;
+  }
+
+  Primitive::Type from_primitive = from->GetPrimitiveType();
+  Primitive::Type to_primitive = to->GetPrimitiveType();
+  DCHECK(from_primitive != Primitive::Type::kPrimVoid);
+  DCHECK(to_primitive != Primitive::Type::kPrimVoid);
+
+  // If |to| and |from| are references.
+  if (IsReferenceType(from_primitive) && IsReferenceType(to_primitive)) {
+    // Assignability is determined during parameter conversion when
+    // invoking the associated method handle.
+    return true;
+  }
+
+  // If |to| and |from| are primitives and a widening conversion exists.
+  if (Primitive::IsWidenable(from_primitive, to_primitive)) {
+    return true;
+  }
+
+  // If |to| is a reference and |from| is a primitive, then boxing conversion.
+  if (IsReferenceType(to_primitive) && IsPrimitiveType(from_primitive)) {
+    return to->IsAssignableFrom(GetBoxedPrimitiveClass(from_primitive));
+  }
+
+  // If |from| is a reference and |to| is a primitive, then unboxing conversion.
+  if (IsPrimitiveType(to_primitive) && IsReferenceType(from_primitive)) {
+    if (from->DescriptorEquals("Ljava/lang/Object;")) {
+      // Object might be converted into a primitive during unboxing.
+      return true;
+    } else if (Primitive::IsNumericType(to_primitive) &&
+               from->DescriptorEquals("Ljava/lang/Number;")) {
+      // Number might be unboxed into any of the number primitive types.
+      return true;
+    }
+    Primitive::Type unboxed_type;
+    if (GetUnboxedPrimitiveType(from, &unboxed_type)) {
+      if (unboxed_type == to_primitive) {
+        // Straightforward unboxing conversion such as Boolean => boolean.
+        return true;
+      } else {
+        // Check if widening operations for numeric primitives would work,
+        // such as Byte => byte => long.
+        return Primitive::IsWidenable(unboxed_type, to_primitive);
+      }
+    }
+  }
+
+  return false;
+}
+
+bool IsReturnTypeConvertible(ObjPtr<mirror::Class> from, ObjPtr<mirror::Class> to)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  if (to->GetPrimitiveType() == Primitive::Type::kPrimVoid) {
+    // Result will be ignored.
+    return true;
+  } else if (from->GetPrimitiveType() == Primitive::Type::kPrimVoid) {
+    // Returned value will be 0 / null.
+    return true;
+  } else {
+    // Otherwise apply usual parameter conversion rules.
+    return IsParameterTypeConvertible(from, to);
+  }
+}
+
+bool ConvertJValueCommon(
+    Handle<mirror::MethodType> callsite_type,
+    Handle<mirror::MethodType> callee_type,
+    ObjPtr<mirror::Class> from,
+    ObjPtr<mirror::Class> to,
+    JValue* value) {
+  // The reader maybe concerned about the safety of the heap object
+  // that may be in |value|. There is only one case where allocation
+  // is obviously needed and that's for boxing. However, in the case
+  // of boxing |value| contains a non-reference type.
+
+  const Primitive::Type from_type = from->GetPrimitiveType();
+  const Primitive::Type to_type = to->GetPrimitiveType();
+
+  // Put incoming value into |src_value| and set return value to 0.
+  // Errors and conversions from void require the return value to be 0.
+  const JValue src_value(*value);
+  value->SetJ(0);
+
+  // Conversion from void set result to zero.
+  if (from_type == Primitive::kPrimVoid) {
+    return true;
+  }
+
+  // This method must be called only when the types don't match.
+  DCHECK(from != to);
+
+  if (IsPrimitiveType(from_type) && IsPrimitiveType(to_type)) {
+    // The source and target types are both primitives.
+    if (UNLIKELY(!ConvertPrimitiveValueNoThrow(from_type, to_type, src_value, value))) {
+      ThrowWrongMethodTypeException(callee_type.Get(), callsite_type.Get());
+      return false;
+    }
+    return true;
+  } else if (IsReferenceType(from_type) && IsReferenceType(to_type)) {
+    // They're both reference types. If "from" is null, we can pass it
+    // through unchanged. If not, we must generate a cast exception if
+    // |to| is not assignable from the dynamic type of |ref|.
+    //
+    // Playing it safe with StackHandleScope here, not expecting any allocation
+    // in mirror::Class::IsAssignable().
+    StackHandleScope<2> hs(Thread::Current());
+    Handle<mirror::Class> h_to(hs.NewHandle(to));
+    Handle<mirror::Object> h_obj(hs.NewHandle(src_value.GetL()));
+    if (h_obj.Get() != nullptr && !to->IsAssignableFrom(h_obj->GetClass())) {
+      ThrowClassCastException(h_to.Get(), h_obj->GetClass());
+      return false;
+    }
+    value->SetL(h_obj.Get());
+    return true;
+  } else if (IsReferenceType(to_type)) {
+    DCHECK(IsPrimitiveType(from_type));
+    // The source type is a primitive and the target type is a reference, so we must box.
+    // The target type maybe a super class of the boxed source type, for example,
+    // if the source type is int, it's boxed type is java.lang.Integer, and the target
+    // type could be java.lang.Number.
+    Primitive::Type type;
+    if (!GetUnboxedPrimitiveType(to, &type)) {
+      ObjPtr<mirror::Class> boxed_from_class = GetBoxedPrimitiveClass(from_type);
+      if (boxed_from_class->IsSubClass(to)) {
+        type = from_type;
+      } else {
+        ThrowWrongMethodTypeException(callee_type.Get(), callsite_type.Get());
+        return false;
+      }
+    }
+
+    if (UNLIKELY(from_type != type)) {
+      ThrowWrongMethodTypeException(callee_type.Get(), callsite_type.Get());
+      return false;
+    }
+
+    if (!ConvertPrimitiveValueNoThrow(from_type, type, src_value, value)) {
+      ThrowWrongMethodTypeException(callee_type.Get(), callsite_type.Get());
+      return false;
+    }
+
+    // Then perform the actual boxing, and then set the reference.
+    ObjPtr<mirror::Object> boxed = BoxPrimitive(type, src_value);
+    value->SetL(boxed.Ptr());
+    return true;
+  } else {
+    // The source type is a reference and the target type is a primitive, so we must unbox.
+    DCHECK(IsReferenceType(from_type));
+    DCHECK(IsPrimitiveType(to_type));
+
+    ObjPtr<mirror::Object> from_obj(src_value.GetL());
+    if (UNLIKELY(from_obj == nullptr)) {
+      ThrowNullPointerException(
+          StringPrintf("Expected to unbox a '%s' primitive type but was returned null",
+                       from->PrettyDescriptor().c_str()).c_str());
+      return false;
+    }
+
+    Primitive::Type unboxed_type;
+    JValue unboxed_value;
+    if (UNLIKELY(!GetUnboxedTypeAndValue(from_obj, &unboxed_type, &unboxed_value))) {
+      ThrowWrongMethodTypeException(callee_type.Get(), callsite_type.Get());
+      return false;
+    }
+
+    if (UNLIKELY(!ConvertPrimitiveValueNoThrow(unboxed_type, to_type, unboxed_value, value))) {
+      ThrowClassCastException(from, to);
+      return false;
+    }
+
+    return true;
+  }
+}
+
+}  // namespace art
diff --git a/runtime/method_handles.h b/runtime/method_handles.h
index 5c68a8f..54c772a 100644
--- a/runtime/method_handles.h
+++ b/runtime/method_handles.h
@@ -19,8 +19,20 @@
 
 #include <ostream>
 
+#include "dex_instruction.h"
+#include "handle.h"
+#include "jvalue.h"
+#include "mirror/class.h"
+#include "mirror/method_type.h"
+
 namespace art {
 
+namespace mirror {
+  class MethodType;
+}
+
+class ShadowFrame;
+
 // Defines the behaviour of a given method handle. The behaviour
 // of a handle of a given kind is identical to the dex bytecode behaviour
 // of the equivalent instruction.
@@ -33,12 +45,13 @@
   kInvokeDirect,
   kInvokeStatic,
   kInvokeInterface,
+  kInvokeTransform,
   kInstanceGet,
   kInstancePut,
   kStaticGet,
   kStaticPut,
   kLastValidKind = kStaticPut,
-  kLastInvokeKind = kInvokeInterface
+  kLastInvokeKind = kInvokeTransform
 };
 
 // Whether the given method handle kind is some variant of an invoke.
@@ -46,6 +59,187 @@
   return handle_kind <= kLastInvokeKind;
 }
 
+// Returns true if there is a possible conversion from |from| to |to|
+// for a MethodHandle parameter.
+bool IsParameterTypeConvertible(ObjPtr<mirror::Class> from,
+                                ObjPtr<mirror::Class> to);
+
+// Returns true if there is a possible conversion from |from| to |to|
+// for the return type of a MethodHandle.
+bool IsReturnTypeConvertible(ObjPtr<mirror::Class> from,
+                             ObjPtr<mirror::Class> to);
+
+// Performs a conversion from type |from| to a distinct type |to| as
+// part of conversion of |caller_type| to |callee_type|. The value to
+// be converted is in |value|. Returns true on success and updates
+// |value| with the converted value, false otherwise.
+bool ConvertJValueCommon(Handle<mirror::MethodType> callsite_type,
+                         Handle<mirror::MethodType> callee_type,
+                         ObjPtr<mirror::Class> from,
+                         ObjPtr<mirror::Class> to,
+                         JValue* value)
+    REQUIRES_SHARED(Locks::mutator_lock_);
+
+// Converts the value of the argument at position |index| from type
+// expected by |callee_type| to type used by |callsite_type|. |value|
+// represents the value to be converted. Returns true on success and
+// updates |value|, false otherwise.
+ALWAYS_INLINE bool ConvertArgumentValue(Handle<mirror::MethodType> callsite_type,
+                                        Handle<mirror::MethodType> callee_type,
+                                        int index,
+                                        JValue* value)
+    REQUIRES_SHARED(Locks::mutator_lock_);
+
+// Converts the return value from return type yielded by
+// |callee_type| to the return type yielded by
+// |callsite_type|. |value| represents the value to be
+// converted. Returns true on success and updates |value|, false
+// otherwise.
+ALWAYS_INLINE bool ConvertReturnValue(Handle<mirror::MethodType> callsite_type,
+                                      Handle<mirror::MethodType> callee_type,
+                                      JValue* value)
+    REQUIRES_SHARED(Locks::mutator_lock_);
+
+// Perform argument conversions between |callsite_type| (the type of the
+// incoming arguments) and |callee_type| (the type of the method being
+// invoked). These include widening and narrowing conversions as well as
+// boxing and unboxing. Returns true on success, on false on failure. A
+// pending exception will always be set on failure.
+//
+// The values to be converted are read from an input source (of type G)
+// that provides three methods :
+//
+// class G {
+//   // Used to read the next boolean/short/int or float value from the
+//   // source.
+//   uint32_t Get();
+//
+//   // Used to the read the next reference value from the source.
+//   ObjPtr<mirror::Object> GetReference();
+//
+//   // Used to read the next double or long value from the source.
+//   int64_t GetLong();
+// }
+//
+// After conversion, the values are written to an output sink (of type S)
+// that provides three methods :
+//
+// class S {
+//   void Set(uint32_t);
+//   void SetReference(ObjPtr<mirror::Object>)
+//   void SetLong(int64_t);
+// }
+//
+// The semantics and usage of the Set methods are analagous to the getter
+// class.
+//
+// This method is instantiated in three different scenarions :
+// - <S = ShadowFrameSetter, G = ShadowFrameGetter> : copying from shadow
+//   frame to shadow frame, used in a regular polymorphic non-exact invoke.
+// - <S = EmulatedShadowFrameAccessor, G = ShadowFrameGetter> : entering into
+//   a transformer method from a polymorphic invoke.
+// - <S = ShadowFrameStter, G = EmulatedStackFrameAccessor> : entering into
+//   a regular poly morphic invoke from a transformer method.
+//
+// TODO(narayan): If we find that the instantiations of this function take
+// up too much space, we can make G / S abstract base classes that are
+// overridden by concrete classes.
+template <typename G, typename S>
+bool PerformConversions(Thread* self,
+                        Handle<mirror::MethodType> callsite_type,
+                        Handle<mirror::MethodType> callee_type,
+                        G* getter,
+                        S* setter,
+                        int32_t num_conversions) REQUIRES_SHARED(Locks::mutator_lock_);
+
+// A convenience wrapper around |PerformConversions|, for the case where
+// the setter and getter are both ShadowFrame based.
+template <bool is_range>
+bool ConvertAndCopyArgumentsFromCallerFrame(Thread* self,
+                                            Handle<mirror::MethodType> callsite_type,
+                                            Handle<mirror::MethodType> callee_type,
+                                            const ShadowFrame& caller_frame,
+                                            uint32_t first_src_reg,
+                                            uint32_t first_dest_reg,
+                                            const uint32_t (&arg)[Instruction::kMaxVarArgRegs],
+                                            ShadowFrame* callee_frame)
+    REQUIRES_SHARED(Locks::mutator_lock_);
+
+// A convenience class that allows for iteration through a list of
+// input argument registers |arg| for non-range invokes or a list of
+// consecutive registers starting with a given based for range
+// invokes.
+//
+// This is used to iterate over input arguments while performing standard
+// argument conversions.
+template <bool is_range> class ShadowFrameGetter {
+ public:
+  ShadowFrameGetter(size_t first_src_reg,
+                    const uint32_t (&arg)[Instruction::kMaxVarArgRegs],
+                    const ShadowFrame& shadow_frame) :
+      first_src_reg_(first_src_reg),
+      arg_(arg),
+      shadow_frame_(shadow_frame),
+      arg_index_(0) {
+  }
+
+  ALWAYS_INLINE uint32_t Get() REQUIRES_SHARED(Locks::mutator_lock_) {
+    const uint32_t next = (is_range ? first_src_reg_ + arg_index_ : arg_[arg_index_]);
+    ++arg_index_;
+
+    return shadow_frame_.GetVReg(next);
+  }
+
+  ALWAYS_INLINE int64_t GetLong() REQUIRES_SHARED(Locks::mutator_lock_) {
+    const uint32_t next = (is_range ? first_src_reg_ + arg_index_ : arg_[arg_index_]);
+    arg_index_ += 2;
+
+    return shadow_frame_.GetVRegLong(next);
+  }
+
+  ALWAYS_INLINE ObjPtr<mirror::Object> GetReference() REQUIRES_SHARED(Locks::mutator_lock_) {
+    const uint32_t next = (is_range ? first_src_reg_ + arg_index_ : arg_[arg_index_]);
+    ++arg_index_;
+
+    return shadow_frame_.GetVRegReference(next);
+  }
+
+ private:
+  const size_t first_src_reg_;
+  const uint32_t (&arg_)[Instruction::kMaxVarArgRegs];
+  const ShadowFrame& shadow_frame_;
+  size_t arg_index_;
+};
+
+// A convenience class that allows values to be written to a given shadow frame,
+// starting at location |first_dst_reg|.
+class ShadowFrameSetter {
+ public:
+  ShadowFrameSetter(ShadowFrame* shadow_frame,
+                    size_t first_dst_reg) :
+    shadow_frame_(shadow_frame),
+    arg_index_(first_dst_reg) {
+  }
+
+  ALWAYS_INLINE void Set(uint32_t value) REQUIRES_SHARED(Locks::mutator_lock_) {
+    shadow_frame_->SetVReg(arg_index_++, value);
+  }
+
+  ALWAYS_INLINE void SetReference(ObjPtr<mirror::Object> value)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    shadow_frame_->SetVRegReference(arg_index_++, value.Ptr());
+  }
+
+  ALWAYS_INLINE void SetLong(int64_t value) REQUIRES_SHARED(Locks::mutator_lock_) {
+    shadow_frame_->SetVRegLong(arg_index_, value);
+    arg_index_ += 2;
+  }
+
+ private:
+  ShadowFrame* shadow_frame_;
+  size_t arg_index_;
+};
+
 }  // namespace art
 
 #endif  // ART_RUNTIME_METHOD_HANDLES_H_
diff --git a/runtime/method_reference.h b/runtime/method_reference.h
index f4fe9b2..0b0afe6 100644
--- a/runtime/method_reference.h
+++ b/runtime/method_reference.h
@@ -18,15 +18,18 @@
 #define ART_RUNTIME_METHOD_REFERENCE_H_
 
 #include <stdint.h>
+#include <string>
+#include "dex_file.h"
 
 namespace art {
 
-class DexFile;
-
 // A method is uniquely located by its DexFile and the method_ids_ table index into that DexFile
 struct MethodReference {
   MethodReference(const DexFile* file, uint32_t index) : dex_file(file), dex_method_index(index) {
   }
+  std::string PrettyMethod(bool with_signature = true) {
+    return dex_file->PrettyMethod(dex_method_index, with_signature);
+  }
   const DexFile* dex_file;
   uint32_t dex_method_index;
 };
diff --git a/runtime/mirror/array-inl.h b/runtime/mirror/array-inl.h
index 9d7f98f..b11dad8 100644
--- a/runtime/mirror/array-inl.h
+++ b/runtime/mirror/array-inl.h
@@ -25,6 +25,7 @@
 #include "base/stringprintf.h"
 #include "class-inl.h"
 #include "gc/heap-inl.h"
+#include "obj_ptr-inl.h"
 #include "thread.h"
 
 namespace art {
@@ -100,10 +101,10 @@
   explicit SetLengthVisitor(int32_t length) : length_(length) {
   }
 
-  void operator()(Object* obj, size_t usable_size ATTRIBUTE_UNUSED) const
+  void operator()(ObjPtr<Object> obj, size_t usable_size ATTRIBUTE_UNUSED) const
       REQUIRES_SHARED(Locks::mutator_lock_) {
     // Avoid AsArray as object is not yet in live bitmap or allocation stack.
-    Array* array = down_cast<Array*>(obj);
+    ObjPtr<Array> array = ObjPtr<Array>::DownCast(obj);
     // DCHECK(array->IsArrayInstance());
     array->SetLength(length_);
   }
@@ -124,10 +125,10 @@
       component_size_shift_(component_size_shift) {
   }
 
-  void operator()(Object* obj, size_t usable_size) const
+  void operator()(ObjPtr<Object> obj, size_t usable_size) const
       REQUIRES_SHARED(Locks::mutator_lock_) {
     // Avoid AsArray as object is not yet in live bitmap or allocation stack.
-    Array* array = down_cast<Array*>(obj);
+    ObjPtr<Array> array = ObjPtr<Array>::DownCast(obj);
     // DCHECK(array->IsArrayInstance());
     int32_t length = (usable_size - header_size_) >> component_size_shift_;
     DCHECK_GE(length, minimum_length_);
@@ -149,8 +150,11 @@
 };
 
 template <bool kIsInstrumented, bool kFillUsable>
-inline Array* Array::Alloc(Thread* self, Class* array_class, int32_t component_count,
-                           size_t component_size_shift, gc::AllocatorType allocator_type) {
+inline Array* Array::Alloc(Thread* self,
+                           ObjPtr<Class> array_class,
+                           int32_t component_count,
+                           size_t component_size_shift,
+                           gc::AllocatorType allocator_type) {
   DCHECK(allocator_type != gc::kAllocatorTypeLOS);
   DCHECK(array_class != nullptr);
   DCHECK(array_class->IsArrayClass());
@@ -164,7 +168,7 @@
   // 32-bit.
   if (UNLIKELY(size == 0)) {
     self->ThrowOutOfMemoryError(StringPrintf("%s of length %d would overflow",
-                                             PrettyDescriptor(array_class).c_str(),
+                                             array_class->PrettyDescriptor().c_str(),
                                              component_count).c_str());
     return nullptr;
   }
@@ -203,7 +207,9 @@
 
 template<typename T>
 inline PrimitiveArray<T>* PrimitiveArray<T>::Alloc(Thread* self, size_t length) {
-  Array* raw_array = Array::Alloc<true>(self, GetArrayClass(), length,
+  Array* raw_array = Array::Alloc<true>(self,
+                                        GetArrayClass(),
+                                        length,
                                         ComponentSizeShiftWidth(sizeof(T)),
                                         Runtime::Current()->GetHeap()->GetCurrentAllocator());
   return down_cast<PrimitiveArray<T>*>(raw_array);
@@ -274,7 +280,9 @@
 }
 
 template<class T>
-inline void PrimitiveArray<T>::Memmove(int32_t dst_pos, PrimitiveArray<T>* src, int32_t src_pos,
+inline void PrimitiveArray<T>::Memmove(int32_t dst_pos,
+                                       ObjPtr<PrimitiveArray<T>> src,
+                                       int32_t src_pos,
                                        int32_t count) {
   if (UNLIKELY(count == 0)) {
     return;
@@ -334,7 +342,9 @@
 }
 
 template<class T>
-inline void PrimitiveArray<T>::Memcpy(int32_t dst_pos, PrimitiveArray<T>* src, int32_t src_pos,
+inline void PrimitiveArray<T>::Memcpy(int32_t dst_pos,
+                                      ObjPtr<PrimitiveArray<T>> src,
+                                      int32_t src_pos,
                                       int32_t count) {
   if (UNLIKELY(count == 0)) {
     return;
@@ -414,6 +424,13 @@
   }
 }
 
+template<typename T>
+inline void PrimitiveArray<T>::SetArrayClass(ObjPtr<Class> array_class) {
+  CHECK(array_class_.IsNull());
+  CHECK(array_class != nullptr);
+  array_class_ = GcRoot<Class>(array_class);
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/array.cc b/runtime/mirror/array.cc
index aee48cc..8afa4aa 100644
--- a/runtime/mirror/array.cc
+++ b/runtime/mirror/array.cc
@@ -60,7 +60,7 @@
     for (int32_t i = 0; i < array_length; i++) {
       StackHandleScope<1> hs2(self);
       Handle<mirror::Class> h_component_type(hs2.NewHandle(array_class->GetComponentType()));
-      Array* sub_array = RecursiveCreateMultiArray(self, h_component_type,
+      ObjPtr<Array> sub_array = RecursiveCreateMultiArray(self, h_component_type,
                                                    current_dimension + 1, dimensions);
       if (UNLIKELY(sub_array == nullptr)) {
         CHECK(self->IsExceptionPending());
@@ -93,7 +93,7 @@
 
   // Find/generate the array class.
   ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
-  mirror::Class* element_class_ptr = element_class.Get();
+  ObjPtr<mirror::Class>  element_class_ptr = element_class.Get();
   StackHandleScope<1> hs(self);
   MutableHandle<mirror::Class> array_class(
       hs.NewHandle(class_linker->FindArrayClass(self, &element_class_ptr)));
@@ -102,7 +102,7 @@
     return nullptr;
   }
   for (int32_t i = 1; i < dimensions->GetLength(); ++i) {
-    mirror::Class* array_class_ptr = array_class.Get();
+    ObjPtr<mirror::Class> array_class_ptr = array_class.Get();
     array_class.Assign(class_linker->FindArrayClass(self, &array_class_ptr));
     if (UNLIKELY(array_class.Get() == nullptr)) {
       CHECK(self->IsExceptionPending());
@@ -110,18 +110,18 @@
     }
   }
   // Create the array.
-  Array* new_array = RecursiveCreateMultiArray(self, array_class, 0, dimensions);
+  ObjPtr<Array> new_array = RecursiveCreateMultiArray(self, array_class, 0, dimensions);
   if (UNLIKELY(new_array == nullptr)) {
     CHECK(self->IsExceptionPending());
   }
-  return new_array;
+  return new_array.Ptr();
 }
 
 void Array::ThrowArrayIndexOutOfBoundsException(int32_t index) {
   art::ThrowArrayIndexOutOfBoundsException(index, GetLength());
 }
 
-void Array::ThrowArrayStoreException(Object* object) {
+void Array::ThrowArrayStoreException(ObjPtr<Object> object) {
   art::ThrowArrayStoreException(object->GetClass(), this->GetClass());
 }
 
@@ -136,12 +136,13 @@
       heap->GetCurrentNonMovingAllocator();
   const auto component_size = GetClass()->GetComponentSize();
   const auto component_shift = GetClass()->GetComponentSizeShift();
-  Array* new_array = Alloc<true>(self, GetClass(), new_length, component_shift, allocator_type);
+  ObjPtr<Array> new_array = Alloc<true>(self, GetClass(), new_length, component_shift, allocator_type);
   if (LIKELY(new_array != nullptr)) {
-    memcpy(new_array->GetRawData(component_size, 0), h_this->GetRawData(component_size, 0),
+    memcpy(new_array->GetRawData(component_size, 0),
+           h_this->GetRawData(component_size, 0),
            std::min(h_this->GetLength(), new_length) << component_shift);
   }
-  return new_array;
+  return new_array.Ptr();
 }
 
 
diff --git a/runtime/mirror/array.h b/runtime/mirror/array.h
index 6c82eb9..994e9b2 100644
--- a/runtime/mirror/array.h
+++ b/runtime/mirror/array.h
@@ -20,6 +20,7 @@
 #include "base/enums.h"
 #include "gc_root.h"
 #include "gc/allocator_type.h"
+#include "obj_ptr.h"
 #include "object.h"
 #include "object_callbacks.h"
 
@@ -38,13 +39,19 @@
   // least component_count size, however, if there's usable space at the end of the allocation the
   // array will fill it.
   template <bool kIsInstrumented, bool kFillUsable = false>
-  ALWAYS_INLINE static Array* Alloc(Thread* self, Class* array_class, int32_t component_count,
-                                    size_t component_size_shift, gc::AllocatorType allocator_type)
-      REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
+  ALWAYS_INLINE static Array* Alloc(Thread* self,
+                                    ObjPtr<Class> array_class,
+                                    int32_t component_count,
+                                    size_t component_size_shift,
+                                    gc::AllocatorType allocator_type)
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(!Roles::uninterruptible_);
 
-  static Array* CreateMultiArray(Thread* self, Handle<Class> element_class,
+  static Array* CreateMultiArray(Thread* self,
+                                 Handle<Class> element_class,
                                  Handle<IntArray> dimensions)
-      REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(!Roles::uninterruptible_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
@@ -89,7 +96,7 @@
       REQUIRES(!Roles::uninterruptible_);
 
  protected:
-  void ThrowArrayStoreException(Object* object) REQUIRES_SHARED(Locks::mutator_lock_)
+  void ThrowArrayStoreException(ObjPtr<Object> object) REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Roles::uninterruptible_);
 
  private:
@@ -146,7 +153,7 @@
    * smaller than element size copies). Arguments are assumed to be within the bounds of the array
    * and the arrays non-null.
    */
-  void Memmove(int32_t dst_pos, PrimitiveArray<T>* src, int32_t src_pos, int32_t count)
+  void Memmove(int32_t dst_pos, ObjPtr<PrimitiveArray<T>> src, int32_t src_pos, int32_t count)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   /*
@@ -154,14 +161,10 @@
    * smaller than element size copies). Arguments are assumed to be within the bounds of the array
    * and the arrays non-null.
    */
-  void Memcpy(int32_t dst_pos, PrimitiveArray<T>* src, int32_t src_pos, int32_t count)
+  void Memcpy(int32_t dst_pos, ObjPtr<PrimitiveArray<T>> src, int32_t src_pos, int32_t count)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  static void SetArrayClass(Class* array_class) {
-    CHECK(array_class_.IsNull());
-    CHECK(array_class != nullptr);
-    array_class_ = GcRoot<Class>(array_class);
-  }
+  static void SetArrayClass(ObjPtr<Class> array_class);
 
   template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   static Class* GetArrayClass() REQUIRES_SHARED(Locks::mutator_lock_) {
diff --git a/runtime/mirror/class-inl.h b/runtime/mirror/class-inl.h
index 1cfed74..9a6d60e 100644
--- a/runtime/mirror/class-inl.h
+++ b/runtime/mirror/class-inl.h
@@ -42,14 +42,14 @@
 template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline uint32_t Class::GetObjectSize() {
   // Note: Extra parentheses to avoid the comma being interpreted as macro parameter separator.
-  DCHECK((!IsVariableSize<kVerifyFlags, kReadBarrierOption>())) << "class=" << PrettyTypeOf(this);
+  DCHECK((!IsVariableSize<kVerifyFlags, kReadBarrierOption>())) << "class=" << PrettyTypeOf();
   return GetField32(ObjectSizeOffset());
 }
 
 template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline uint32_t Class::GetObjectSizeAllocFastPath() {
   // Note: Extra parentheses to avoid the comma being interpreted as macro parameter separator.
-  DCHECK((!IsVariableSize<kVerifyFlags, kReadBarrierOption>())) << "class=" << PrettyTypeOf(this);
+  DCHECK((!IsVariableSize<kVerifyFlags, kReadBarrierOption>())) << "class=" << PrettyTypeOf();
   return GetField32(ObjectSizeAllocFastPathOffset());
 }
 
@@ -218,7 +218,7 @@
 inline ArtMethod* Class::GetVirtualMethod(size_t i, PointerSize pointer_size) {
   CheckPointerSize(pointer_size);
   DCHECK(IsResolved<kVerifyFlags>() || IsErroneous<kVerifyFlags>())
-      << PrettyClass(this) << " status=" << GetStatus();
+      << Class::PrettyClass() << " status=" << GetStatus();
   return GetVirtualMethodUnchecked(i, pointer_size);
 }
 
@@ -306,14 +306,14 @@
   SetEmbeddedVTableEntryUnchecked(i, method, pointer_size);
 }
 
-inline bool Class::Implements(Class* klass) {
+inline bool Class::Implements(ObjPtr<Class> klass) {
   DCHECK(klass != nullptr);
-  DCHECK(klass->IsInterface()) << PrettyClass(this);
+  DCHECK(klass->IsInterface()) << PrettyClass();
   // All interfaces implemented directly and by our superclass, and
   // recursively all super-interfaces of those interfaces, are listed
   // in iftable_, so we can just do a linear scan through that.
   int32_t iftable_count = GetIfTableCount();
-  IfTable* iftable = GetIfTable();
+  ObjPtr<IfTable> iftable = GetIfTable();
   for (int32_t i = 0; i < iftable_count; i++) {
     if (iftable->GetInterface(i) == klass) {
       return true;
@@ -342,20 +342,20 @@
 //   Object[]         = int[] --> false
 //
 inline bool Class::IsArrayAssignableFromArray(ObjPtr<Class> src) {
-  DCHECK(IsArrayClass())  << PrettyClass(this);
-  DCHECK(src->IsArrayClass()) << PrettyClass(src);
+  DCHECK(IsArrayClass())  << PrettyClass();
+  DCHECK(src->IsArrayClass()) << src->PrettyClass();
   return GetComponentType()->IsAssignableFrom(src->GetComponentType());
 }
 
 inline bool Class::IsAssignableFromArray(ObjPtr<Class> src) {
-  DCHECK(!IsInterface()) << PrettyClass(this);  // handled first in IsAssignableFrom
-  DCHECK(src->IsArrayClass()) << PrettyClass(src);
+  DCHECK(!IsInterface()) << PrettyClass();  // handled first in IsAssignableFrom
+  DCHECK(src->IsArrayClass()) << src->PrettyClass();
   if (!IsArrayClass()) {
     // If "this" is not also an array, it must be Object.
     // src's super should be java_lang_Object, since it is an array.
-    Class* java_lang_Object = src->GetSuperClass();
-    DCHECK(java_lang_Object != nullptr) << PrettyClass(src);
-    DCHECK(java_lang_Object->GetSuperClass() == nullptr) << PrettyClass(src);
+    ObjPtr<Class> java_lang_Object = src->GetSuperClass();
+    DCHECK(java_lang_Object != nullptr) << src->PrettyClass();
+    DCHECK(java_lang_Object->GetSuperClass() == nullptr) << src->PrettyClass();
     return this == java_lang_Object;
   }
   return IsArrayAssignableFromArray(src);
@@ -384,7 +384,7 @@
     DCHECK(dex_access_to != nullptr);
     if (UNLIKELY(!this->CanAccess(dex_access_to))) {
       if (throw_on_failure) {
-        ThrowIllegalAccessErrorClass(this, dex_access_to.Ptr());
+        ThrowIllegalAccessErrorClass(this, dex_access_to);
       }
       return false;
     }
@@ -448,24 +448,29 @@
 inline bool Class::CheckResolvedFieldAccess(ObjPtr<Class> access_to,
                                             ArtField* field,
                                             uint32_t field_idx) {
-  return ResolvedFieldAccessTest<true, true>(access_to.Ptr(), field, field_idx, nullptr);
+  return ResolvedFieldAccessTest<true, true>(access_to, field, field_idx, nullptr);
 }
 
-inline bool Class::CanAccessResolvedMethod(Class* access_to, ArtMethod* method,
-                                           DexCache* dex_cache, uint32_t method_idx) {
+inline bool Class::CanAccessResolvedMethod(ObjPtr<Class> access_to,
+                                           ArtMethod* method,
+                                           ObjPtr<DexCache> dex_cache,
+                                           uint32_t method_idx) {
   return ResolvedMethodAccessTest<false, false, kStatic>(access_to, method, method_idx, dex_cache);
 }
 
 template <InvokeType throw_invoke_type>
-inline bool Class::CheckResolvedMethodAccess(Class* access_to, ArtMethod* method,
+inline bool Class::CheckResolvedMethodAccess(ObjPtr<Class> access_to,
+                                             ArtMethod* method,
                                              uint32_t method_idx) {
-  return ResolvedMethodAccessTest<true, true, throw_invoke_type>(access_to, method, method_idx,
+  return ResolvedMethodAccessTest<true, true, throw_invoke_type>(access_to,
+                                                                 method,
+                                                                 method_idx,
                                                                  nullptr);
 }
 
 inline bool Class::IsSubClass(ObjPtr<Class> klass) {
-  DCHECK(!IsInterface()) << PrettyClass(this);
-  DCHECK(!IsArrayClass()) << PrettyClass(this);
+  DCHECK(!IsInterface()) << PrettyClass();
+  DCHECK(!IsArrayClass()) << PrettyClass();
   ObjPtr<Class> current = this;
   do {
     if (current == klass) {
@@ -478,13 +483,13 @@
 
 inline ArtMethod* Class::FindVirtualMethodForInterface(ArtMethod* method,
                                                        PointerSize pointer_size) {
-  Class* declaring_class = method->GetDeclaringClass();
-  DCHECK(declaring_class != nullptr) << PrettyClass(this);
-  DCHECK(declaring_class->IsInterface()) << PrettyMethod(method);
+  ObjPtr<Class> declaring_class = method->GetDeclaringClass();
+  DCHECK(declaring_class != nullptr) << PrettyClass();
+  DCHECK(declaring_class->IsInterface()) << method->PrettyMethod();
   DCHECK(!method->IsCopied());
   // TODO cache to improve lookup speed
   const int32_t iftable_count = GetIfTableCount();
-  IfTable* iftable = GetIfTable();
+  ObjPtr<IfTable> iftable = GetIfTable();
   for (int32_t i = 0; i < iftable_count; i++) {
     if (iftable->GetInterface(i) == declaring_class) {
       return iftable->GetMethodArray(i)->GetElementPtrSize<ArtMethod*>(
@@ -521,20 +526,18 @@
 template<VerifyObjectFlags kVerifyFlags,
          ReadBarrierOption kReadBarrierOption>
 inline IfTable* Class::GetIfTable() {
-  return GetFieldObject<IfTable, kVerifyFlags, kReadBarrierOption>(
-      OFFSET_OF_OBJECT_MEMBER(Class, iftable_));
+  ObjPtr<IfTable> ret = GetFieldObject<IfTable, kVerifyFlags, kReadBarrierOption>(IfTableOffset());
+  DCHECK(ret != nullptr) << PrettyClass(this);
+  return ret.Ptr();
 }
 
 inline int32_t Class::GetIfTableCount() {
-  IfTable* iftable = GetIfTable();
-  if (iftable == nullptr) {
-    return 0;
-  }
-  return iftable->Count();
+  return GetIfTable()->Count();
 }
 
-inline void Class::SetIfTable(IfTable* new_iftable) {
-  SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, iftable_), new_iftable);
+inline void Class::SetIfTable(ObjPtr<IfTable> new_iftable) {
+  DCHECK(new_iftable != nullptr) << PrettyClass(this);
+  SetFieldObject<false>(IfTableOffset(), new_iftable);
 }
 
 inline LengthPrefixedArray<ArtField>* Class::GetIFieldsPtr() {
@@ -544,20 +547,20 @@
 
 template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline MemberOffset Class::GetFirstReferenceInstanceFieldOffset() {
-  Class* super_class = GetSuperClass<kVerifyFlags, kReadBarrierOption>();
+  ObjPtr<Class> super_class = GetSuperClass<kVerifyFlags, kReadBarrierOption>();
   return (super_class != nullptr)
       ? MemberOffset(RoundUp(super_class->GetObjectSize<kVerifyFlags, kReadBarrierOption>(),
-                             sizeof(mirror::HeapReference<mirror::Object>)))
+                             kHeapReferenceSize))
       : ClassOffset();
 }
 
 template <VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline MemberOffset Class::GetFirstReferenceStaticFieldOffset(PointerSize pointer_size) {
   DCHECK(IsResolved());
-  uint32_t base = sizeof(mirror::Class);  // Static fields come after the class.
+  uint32_t base = sizeof(Class);  // Static fields come after the class.
   if (ShouldHaveEmbeddedVTable<kVerifyFlags, kReadBarrierOption>()) {
     // Static fields come after the embedded tables.
-    base = mirror::Class::ComputeClassSize(
+    base = Class::ComputeClassSize(
         true, GetEmbeddedVTableLength(), 0, 0, 0, 0, 0, pointer_size);
   }
   return MemberOffset(base);
@@ -566,10 +569,10 @@
 inline MemberOffset Class::GetFirstReferenceStaticFieldOffsetDuringLinking(
     PointerSize pointer_size) {
   DCHECK(IsLoaded());
-  uint32_t base = sizeof(mirror::Class);  // Static fields come after the class.
+  uint32_t base = sizeof(Class);  // Static fields come after the class.
   if (ShouldHaveEmbeddedVTable()) {
     // Static fields come after the embedded tables.
-    base = mirror::Class::ComputeClassSize(true, GetVTableDuringLinking()->GetLength(),
+    base = Class::ComputeClassSize(true, GetVTableDuringLinking()->GetLength(),
                                            0, 0, 0, 0, 0, pointer_size);
   }
   return MemberOffset(base);
@@ -642,7 +645,7 @@
           IsErroneous<static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis)>()
       << " IsString=" << (this == String::GetJavaLangString())
       << " status= " << GetStatus<kVerifyFlags>()
-      << " descriptor=" << PrettyDescriptor(this);
+      << " descriptor=" << PrettyDescriptor();
   return GetField32<kVerifyFlags>(AccessFlagsOffset());
 }
 
@@ -650,7 +653,7 @@
   return GetFieldObject<String>(OFFSET_OF_OBJECT_MEMBER(Class, name_));
 }
 
-inline void Class::SetName(String* name) {
+inline void Class::SetName(ObjPtr<String> name) {
   if (Runtime::Current()->IsActiveTransaction()) {
     SetFieldObject<true>(OFFSET_OF_OBJECT_MEMBER(Class, name_), name);
   } else {
@@ -682,34 +685,38 @@
 
 inline void Class::CheckObjectAlloc() {
   DCHECK(!IsArrayClass())
-      << PrettyClass(this)
+      << PrettyClass()
       << "A array shouldn't be allocated through this "
       << "as it requires a pre-fence visitor that sets the class size.";
   DCHECK(!IsClassClass())
-      << PrettyClass(this)
+      << PrettyClass()
       << "A class object shouldn't be allocated through this "
       << "as it requires a pre-fence visitor that sets the class size.";
   DCHECK(!IsStringClass())
-      << PrettyClass(this)
+      << PrettyClass()
       << "A string shouldn't be allocated through this "
       << "as it requires a pre-fence visitor that sets the class size.";
-  DCHECK(IsInstantiable()) << PrettyClass(this);
+  DCHECK(IsInstantiable()) << PrettyClass();
   // TODO: decide whether we want this check. It currently fails during bootstrap.
-  // DCHECK(!Runtime::Current()->IsStarted() || IsInitializing()) << PrettyClass(this);
+  // DCHECK(!Runtime::Current()->IsStarted() || IsInitializing()) << PrettyClass();
   DCHECK_GE(this->object_size_, sizeof(Object));
 }
 
 template<bool kIsInstrumented, bool kCheckAddFinalizer>
-inline Object* Class::Alloc(Thread* self, gc::AllocatorType allocator_type) {
+inline ObjPtr<Object> Class::Alloc(Thread* self, gc::AllocatorType allocator_type) {
   CheckObjectAlloc();
   gc::Heap* heap = Runtime::Current()->GetHeap();
   const bool add_finalizer = kCheckAddFinalizer && IsFinalizable();
   if (!kCheckAddFinalizer) {
     DCHECK(!IsFinalizable());
   }
-  mirror::Object* obj =
-      heap->AllocObjectWithAllocator<kIsInstrumented, false>(self, this, this->object_size_,
-                                                             allocator_type, VoidFunctor());
+  // Note that the this pointer may be invalidated after the allocation.
+  ObjPtr<Object> obj =
+      heap->AllocObjectWithAllocator<kIsInstrumented, false>(self,
+                                                             this,
+                                                             this->object_size_,
+                                                             allocator_type,
+                                                             VoidFunctor());
   if (add_finalizer && LIKELY(obj != nullptr)) {
     heap->AddFinalizerReference(self, &obj);
     if (UNLIKELY(self->IsExceptionPending())) {
@@ -717,14 +724,14 @@
       obj = nullptr;
     }
   }
-  return obj;
+  return obj.Ptr();
 }
 
-inline Object* Class::AllocObject(Thread* self) {
+inline ObjPtr<Object> Class::AllocObject(Thread* self) {
   return Alloc<true>(self, Runtime::Current()->GetHeap()->GetCurrentAllocator());
 }
 
-inline Object* Class::AllocNonMovableObject(Thread* self) {
+inline ObjPtr<Object> Class::AllocNonMovableObject(Thread* self) {
   return Alloc<true>(self, Runtime::Current()->GetHeap()->GetCurrentNonMovingAllocator());
 }
 
@@ -746,7 +753,7 @@
   }
 
   // Space used by reference statics.
-  size += num_ref_static_fields * sizeof(HeapReference<Object>);
+  size += num_ref_static_fields * kHeapReferenceSize;
   if (!IsAligned<8>(size) && num_64bit_static_fields > 0) {
     uint32_t gap = 8 - (size & 0x7);
     size += gap;  // will be padded
@@ -777,8 +784,8 @@
           VerifyObjectFlags kVerifyFlags,
           ReadBarrierOption kReadBarrierOption,
           typename Visitor>
-inline void Class::VisitReferences(mirror::Class* klass, const Visitor& visitor) {
-  VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass, visitor);
+inline void Class::VisitReferences(ObjPtr<Class> klass, const Visitor& visitor) {
+  VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass.Ptr(), visitor);
   // Right after a class is allocated, but not yet loaded
   // (kStatusNotReady, see ClassLinker::LoadClass()), GC may find it
   // and scan it. IsTemp() may call Class::GetAccessFlags() but may
@@ -806,7 +813,7 @@
 
 template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Class::IsClassClass() {
-  Class* java_lang_Class = GetClass<kVerifyFlags, kReadBarrierOption>()->
+  ObjPtr<Class> java_lang_Class = GetClass<kVerifyFlags, kReadBarrierOption>()->
       template GetClass<kVerifyFlags, kReadBarrierOption>();
   return this == java_lang_Class;
 }
@@ -831,8 +838,8 @@
 
 inline void Class::AssertInitializedOrInitializingInThread(Thread* self) {
   if (kIsDebugBuild && !IsInitialized()) {
-    CHECK(IsInitializing()) << PrettyClass(this) << " is not initializing: " << GetStatus();
-    CHECK_EQ(GetClinitThreadId(), self->GetTid()) << PrettyClass(this)
+    CHECK(IsInitializing()) << PrettyClass() << " is not initializing: " << GetStatus();
+    CHECK_EQ(GetClinitThreadId(), self->GetTid()) << PrettyClass()
                                                   << " is initializing in a different thread";
   }
 }
@@ -879,12 +886,11 @@
   SetFieldBoolean<false, false>(GetSlowPathFlagOffset(), enabled);
 }
 
-inline void Class::InitializeClassVisitor::operator()(
-    mirror::Object* obj, size_t usable_size) const {
+inline void Class::InitializeClassVisitor::operator()(ObjPtr<Object> obj,
+                                                      size_t usable_size) const {
   DCHECK_LE(class_size_, usable_size);
   // Avoid AsClass as object is not yet in live bitmap or allocation stack.
-  mirror::Class* klass = down_cast<mirror::Class*>(obj);
-  // DCHECK(klass->IsClass());
+  ObjPtr<Class> klass = ObjPtr<Class>::DownCast(obj);
   klass->SetClassSize(class_size_);
   klass->SetPrimitiveType(Primitive::kPrimNot);  // Default to not being primitive.
   klass->SetDexClassDefIndex(DexFile::kDexNoIndex16);  // Default to no valid class def index.
@@ -916,7 +922,7 @@
   } else if (IsArrayClass()) {
     return 2;
   } else if (IsProxyClass()) {
-    mirror::ObjectArray<mirror::Class>* interfaces = GetInterfaces();
+    ObjectArray<Class>* interfaces = GetInterfaces();
     return interfaces != nullptr ? interfaces->GetLength() : 0;
   } else {
     const DexFile::TypeList* interfaces = GetInterfaceTypeList();
@@ -937,7 +943,7 @@
 }
 
 template<ReadBarrierOption kReadBarrierOption, class Visitor>
-void mirror::Class::VisitNativeRoots(Visitor& visitor, PointerSize pointer_size) {
+void Class::VisitNativeRoots(Visitor& visitor, PointerSize pointer_size) {
   for (ArtField& field : GetSFieldsUnchecked()) {
     // Visit roots first in case the declaring class gets moved.
     field.VisitRoots(visitor);
@@ -1066,7 +1072,7 @@
 }
 
 template <VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption, typename Visitor>
-inline void Class::FixupNativePointers(mirror::Class* dest,
+inline void Class::FixupNativePointers(Class* dest,
                                        PointerSize pointer_size,
                                        const Visitor& visitor) {
   // Update the field arrays.
@@ -1135,6 +1141,14 @@
   return this->IsInSamePackage(access_to);
 }
 
+inline bool Class::CannotBeAssignedFromOtherTypes() {
+  if (!IsArrayClass()) {
+    return IsFinal();
+  }
+  ObjPtr<Class> component = GetComponentType();
+  return component->IsPrimitive() || component->CannotBeAssignedFromOtherTypes();
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/class.cc b/runtime/mirror/class.cc
index 40742d2..db46027 100644
--- a/runtime/mirror/class.cc
+++ b/runtime/mirror/class.cc
@@ -18,6 +18,7 @@
 
 #include "art_field-inl.h"
 #include "art_method-inl.h"
+#include "class_ext.h"
 #include "class_linker-inl.h"
 #include "class_loader.h"
 #include "class-inl.h"
@@ -29,6 +30,7 @@
 #include "method.h"
 #include "object_array-inl.h"
 #include "object-inl.h"
+#include "object_lock.h"
 #include "runtime.h"
 #include "thread.h"
 #include "throwable.h"
@@ -40,12 +42,12 @@
 
 GcRoot<Class> Class::java_lang_Class_;
 
-void Class::SetClassClass(Class* java_lang_Class) {
+void Class::SetClassClass(ObjPtr<Class> java_lang_Class) {
   CHECK(java_lang_Class_.IsNull())
       << java_lang_Class_.Read()
       << " " << java_lang_Class;
   CHECK(java_lang_Class != nullptr);
-  java_lang_Class->SetClassFlags(mirror::kClassFlagClass);
+  java_lang_Class->SetClassFlags(kClassFlagClass);
   java_lang_Class_ = GcRoot<Class>(java_lang_Class);
 }
 
@@ -58,12 +60,49 @@
   java_lang_Class_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
 }
 
-inline void Class::SetVerifyError(mirror::Object* error) {
-  CHECK(error != nullptr) << PrettyClass(this);
-  if (Runtime::Current()->IsActiveTransaction()) {
-    SetFieldObject<true>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_), error);
+ClassExt* Class::GetExtData() {
+  return GetFieldObject<ClassExt>(OFFSET_OF_OBJECT_MEMBER(Class, ext_data_));
+}
+
+ClassExt* Class::EnsureExtDataPresent(Thread* self) {
+  ObjPtr<ClassExt> existing(GetExtData());
+  if (!existing.IsNull()) {
+    return existing.Ptr();
+  }
+  StackHandleScope<3> hs(self);
+  // Handlerize 'this' since we are allocating here.
+  Handle<Class> h_this(hs.NewHandle(this));
+  // Clear exception so we can allocate.
+  Handle<Throwable> throwable(hs.NewHandle(self->GetException()));
+  self->ClearException();
+  // Allocate the ClassExt
+  Handle<ClassExt> new_ext(hs.NewHandle(ClassExt::Alloc(self)));
+  if (new_ext.Get() == nullptr) {
+    // OOM allocating the classExt.
+    // TODO Should we restore the suppressed exception?
+    self->AssertPendingOOMException();
+    return nullptr;
   } else {
-    SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_), error);
+    MemberOffset ext_offset(OFFSET_OF_OBJECT_MEMBER(Class, ext_data_));
+    bool set;
+    // Set the ext_data_ field using CAS semantics.
+    if (Runtime::Current()->IsActiveTransaction()) {
+      set = h_this->CasFieldStrongSequentiallyConsistentObject<true>(ext_offset,
+                                                                     ObjPtr<ClassExt>(nullptr),
+                                                                     new_ext.Get());
+    } else {
+      set = h_this->CasFieldStrongSequentiallyConsistentObject<false>(ext_offset,
+                                                                      ObjPtr<ClassExt>(nullptr),
+                                                                      new_ext.Get());
+    }
+    ObjPtr<ClassExt> ret(set ? new_ext.Get() : h_this->GetExtData());
+    DCHECK(!set || h_this->GetExtData() == new_ext.Get());
+    CHECK(!ret.IsNull());
+    // Restore the exception if there was one.
+    if (throwable.Get() != nullptr) {
+      self->SetException(throwable.Get());
+    }
+    return ret.Ptr();
   }
 }
 
@@ -74,31 +113,37 @@
   if (LIKELY(class_linker_initialized)) {
     if (UNLIKELY(new_status <= old_status && new_status != kStatusError &&
                  new_status != kStatusRetired)) {
-      LOG(FATAL) << "Unexpected change back of class status for " << PrettyClass(h_this.Get())
+      LOG(FATAL) << "Unexpected change back of class status for " << h_this->PrettyClass()
                  << " " << old_status << " -> " << new_status;
     }
     if (new_status >= kStatusResolved || old_status >= kStatusResolved) {
       // When classes are being resolved the resolution code should hold the lock.
       CHECK_EQ(h_this->GetLockOwnerThreadId(), self->GetThreadId())
             << "Attempt to change status of class while not holding its lock: "
-            << PrettyClass(h_this.Get()) << " " << old_status << " -> " << new_status;
+            << h_this->PrettyClass() << " " << old_status << " -> " << new_status;
     }
   }
   if (UNLIKELY(new_status == kStatusError)) {
     CHECK_NE(h_this->GetStatus(), kStatusError)
         << "Attempt to set as erroneous an already erroneous class "
-        << PrettyClass(h_this.Get());
+        << h_this->PrettyClass();
     if (VLOG_IS_ON(class_linker)) {
-      LOG(ERROR) << "Setting " << PrettyDescriptor(h_this.Get()) << " to erroneous.";
+      LOG(ERROR) << "Setting " << h_this->PrettyDescriptor() << " to erroneous.";
       if (self->IsExceptionPending()) {
         LOG(ERROR) << "Exception: " << self->GetException()->Dump();
       }
     }
 
-    // Remember the current exception.
-    CHECK(self->GetException() != nullptr);
-    h_this->SetVerifyError(self->GetException());
+    ObjPtr<ClassExt> ext(h_this->EnsureExtDataPresent(self));
+    if (!ext.IsNull()) {
+      self->AssertPendingException();
+      ext->SetVerifyError(self->GetException());
+    } else {
+      self->AssertPendingOOMException();
+    }
+    self->AssertPendingException();
   }
+
   static_assert(sizeof(Status) == sizeof(uint32_t), "Size of status not equal to uint32");
   if (Runtime::Current()->IsActiveTransaction()) {
     h_this->SetField32Volatile<true>(StatusOffset(), new_status);
@@ -127,7 +172,7 @@
     if (h_this->IsTemp()) {
       // Class is a temporary one, ensure that waiters for resolution get notified of retirement
       // so that they can grab the new version of the class from the class linker's table.
-      CHECK_LT(new_status, kStatusResolved) << PrettyDescriptor(h_this.Get());
+      CHECK_LT(new_status, kStatusResolved) << h_this->PrettyDescriptor();
       if (new_status == kStatusRetired || new_status == kStatusError) {
         h_this->NotifyAll(self);
       }
@@ -140,7 +185,7 @@
   }
 }
 
-void Class::SetDexCache(DexCache* new_dex_cache) {
+void Class::SetDexCache(ObjPtr<DexCache> new_dex_cache) {
   SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, dex_cache_), new_dex_cache);
   SetDexCacheStrings(new_dex_cache != nullptr ? new_dex_cache->GetStrings() : nullptr);
 }
@@ -149,7 +194,7 @@
   if (kIsDebugBuild && new_class_size < GetClassSize()) {
     DumpClass(LOG_STREAM(FATAL_WITHOUT_ABORT), kDumpClassFullDetail);
     LOG(FATAL_WITHOUT_ABORT) << new_class_size << " vs " << GetClassSize();
-    LOG(FATAL) << "class=" << PrettyTypeOf(this);
+    LOG(FATAL) << "class=" << PrettyTypeOf();
   }
   // Not called within a transaction.
   SetField32<false>(OFFSET_OF_OBJECT_MEMBER(Class, class_size_), new_class_size);
@@ -196,7 +241,7 @@
 
 void Class::DumpClass(std::ostream& os, int flags) {
   if ((flags & kDumpClassFullDetail) == 0) {
-    os << PrettyClass(this);
+    os << PrettyClass();
     if ((flags & kDumpClassClassLoader) != 0) {
       os << ' ' << GetClassLoader();
     }
@@ -209,8 +254,8 @@
 
   Thread* const self = Thread::Current();
   StackHandleScope<2> hs(self);
-  Handle<mirror::Class> h_this(hs.NewHandle(this));
-  Handle<mirror::Class> h_super(hs.NewHandle(GetSuperClass()));
+  Handle<Class> h_this(hs.NewHandle(this));
+  Handle<Class> h_super(hs.NewHandle(GetSuperClass()));
   auto image_pointer_size = Runtime::Current()->GetClassLinker()->GetImagePointerSize();
 
   std::string temp;
@@ -221,7 +266,7 @@
   os << StringPrintf("  access=0x%04x.%04x\n",
       GetAccessFlags() >> 16, GetAccessFlags() & kAccJavaFlagsMask);
   if (h_super.Get() != nullptr) {
-    os << "  super='" << PrettyClass(h_super.Get()) << "' (cl=" << h_super->GetClassLoader()
+    os << "  super='" << h_super->PrettyClass() << "' (cl=" << h_super->GetClassLoader()
        << ")\n";
   }
   if (IsArrayClass()) {
@@ -231,12 +276,12 @@
   if (num_direct_interfaces > 0) {
     os << "  interfaces (" << num_direct_interfaces << "):\n";
     for (size_t i = 0; i < num_direct_interfaces; ++i) {
-      Class* interface = GetDirectInterface(self, h_this, i);
+      ObjPtr<Class> interface = GetDirectInterface(self, h_this, i);
       if (interface == nullptr) {
         os << StringPrintf("    %2zd: nullptr!\n", i);
       } else {
-        const ClassLoader* cl = interface->GetClassLoader();
-        os << StringPrintf("    %2zd: %s (cl=%p)\n", i, PrettyClass(interface).c_str(), cl);
+        ObjPtr<ClassLoader> cl = interface->GetClassLoader();
+        os << StringPrintf("    %2zd: %s (cl=%p)\n", i, PrettyClass(interface).c_str(), cl.Ptr());
       }
     }
   }
@@ -247,19 +292,20 @@
     os << "  vtable (" << h_this->NumVirtualMethods() << " entries, "
         << (h_super.Get() != nullptr ? h_super->NumVirtualMethods() : 0) << " in super):\n";
     for (size_t i = 0; i < NumVirtualMethods(); ++i) {
-      os << StringPrintf("    %2zd: %s\n", i, PrettyMethod(
+      os << StringPrintf("    %2zd: %s\n", i, ArtMethod::PrettyMethod(
           h_this->GetVirtualMethodDuringLinking(i, image_pointer_size)).c_str());
     }
     os << "  direct methods (" << h_this->NumDirectMethods() << " entries):\n";
     for (size_t i = 0; i < h_this->NumDirectMethods(); ++i) {
-      os << StringPrintf("    %2zd: %s\n", i, PrettyMethod(
+      os << StringPrintf("    %2zd: %s\n", i, ArtMethod::PrettyMethod(
           h_this->GetDirectMethod(i, image_pointer_size)).c_str());
     }
     if (h_this->NumStaticFields() > 0) {
       os << "  static fields (" << h_this->NumStaticFields() << " entries):\n";
       if (h_this->IsResolved() || h_this->IsErroneous()) {
         for (size_t i = 0; i < h_this->NumStaticFields(); ++i) {
-          os << StringPrintf("    %2zd: %s\n", i, PrettyField(h_this->GetStaticField(i)).c_str());
+          os << StringPrintf("    %2zd: %s\n", i,
+                             ArtField::PrettyField(h_this->GetStaticField(i)).c_str());
         }
       } else {
         os << "    <not yet available>";
@@ -269,7 +315,8 @@
       os << "  instance fields (" << h_this->NumInstanceFields() << " entries):\n";
       if (h_this->IsResolved() || h_this->IsErroneous()) {
         for (size_t i = 0; i < h_this->NumInstanceFields(); ++i) {
-          os << StringPrintf("    %2zd: %s\n", i, PrettyField(h_this->GetInstanceField(i)).c_str());
+          os << StringPrintf("    %2zd: %s\n", i,
+                             ArtField::PrettyField(h_this->GetInstanceField(i)).c_str());
         }
       } else {
         os << "    <not yet available>";
@@ -283,7 +330,7 @@
     // Sanity check that the number of bits set in the reference offset bitmap
     // agrees with the number of references
     uint32_t count = 0;
-    for (Class* c = this; c != nullptr; c = c->GetSuperClass()) {
+    for (ObjPtr<Class> c = this; c != nullptr; c = c->GetSuperClass()) {
       count += c->NumReferenceInstanceFieldsDuringLinking();
     }
     // +1 for the Class in Object.
@@ -338,7 +385,7 @@
   return WellKnownClasses::ToClass(WellKnownClasses::java_lang_Throwable)->IsAssignableFrom(this);
 }
 
-void Class::SetClassLoader(ClassLoader* new_class_loader) {
+void Class::SetClassLoader(ObjPtr<ClassLoader> new_class_loader) {
   if (Runtime::Current()->IsActiveTransaction()) {
     SetFieldObject<true>(OFFSET_OF_OBJECT_MEMBER(Class, class_loader_), new_class_loader);
   } else {
@@ -356,7 +403,7 @@
   }
 
   int32_t iftable_count = GetIfTableCount();
-  IfTable* iftable = GetIfTable();
+  ObjPtr<IfTable> iftable = GetIfTable();
   for (int32_t i = 0; i < iftable_count; ++i) {
     method = iftable->GetInterface(i)->FindDeclaredVirtualMethod(name, signature, pointer_size);
     if (method != nullptr) {
@@ -376,7 +423,7 @@
   }
 
   int32_t iftable_count = GetIfTableCount();
-  IfTable* iftable = GetIfTable();
+  ObjPtr<IfTable> iftable = GetIfTable();
   for (int32_t i = 0; i < iftable_count; ++i) {
     method = iftable->GetInterface(i)->FindDeclaredVirtualMethod(name, signature, pointer_size);
     if (method != nullptr) {
@@ -386,7 +433,7 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindInterfaceMethod(const DexCache* dex_cache,
+ArtMethod* Class::FindInterfaceMethod(ObjPtr<DexCache> dex_cache,
                                       uint32_t dex_method_idx,
                                       PointerSize pointer_size) {
   // Check the current class before checking the interfaces.
@@ -396,7 +443,7 @@
   }
 
   int32_t iftable_count = GetIfTableCount();
-  IfTable* iftable = GetIfTable();
+  ObjPtr<IfTable> iftable = GetIfTable();
   for (int32_t i = 0; i < iftable_count; ++i) {
     method = iftable->GetInterface(i)->FindDeclaredVirtualMethod(
         dex_cache, dex_method_idx, pointer_size);
@@ -429,7 +476,7 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindDeclaredDirectMethod(const DexCache* dex_cache,
+ArtMethod* Class::FindDeclaredDirectMethod(ObjPtr<DexCache> dex_cache,
                                            uint32_t dex_method_idx,
                                            PointerSize pointer_size) {
   if (GetDexCache() == dex_cache) {
@@ -445,7 +492,7 @@
 ArtMethod* Class::FindDirectMethod(const StringPiece& name,
                                    const StringPiece& signature,
                                    PointerSize pointer_size) {
-  for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
+  for (ObjPtr<Class> klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredDirectMethod(name, signature, pointer_size);
     if (method != nullptr) {
       return method;
@@ -457,7 +504,7 @@
 ArtMethod* Class::FindDirectMethod(const StringPiece& name,
                                    const Signature& signature,
                                    PointerSize pointer_size) {
-  for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
+  for (ObjPtr<Class> klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredDirectMethod(name, signature, pointer_size);
     if (method != nullptr) {
       return method;
@@ -466,9 +513,10 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindDirectMethod(
-    const DexCache* dex_cache, uint32_t dex_method_idx, PointerSize pointer_size) {
-  for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
+ArtMethod* Class::FindDirectMethod(ObjPtr<DexCache> dex_cache,
+                                   uint32_t dex_method_idx,
+                                   PointerSize pointer_size) {
+  for (ObjPtr<Class> klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredDirectMethod(dex_cache, dex_method_idx, pointer_size);
     if (method != nullptr) {
       return method;
@@ -516,7 +564,7 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindDeclaredVirtualMethod(const DexCache* dex_cache,
+ArtMethod* Class::FindDeclaredVirtualMethod(ObjPtr<DexCache> dex_cache,
                                             uint32_t dex_method_idx,
                                             PointerSize pointer_size) {
   if (GetDexCache() == dex_cache) {
@@ -540,9 +588,10 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindVirtualMethod(
-    const StringPiece& name, const StringPiece& signature, PointerSize pointer_size) {
-  for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
+ArtMethod* Class::FindVirtualMethod(const StringPiece& name,
+                                    const StringPiece& signature,
+                                    PointerSize pointer_size) {
+  for (ObjPtr<Class> klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredVirtualMethod(name, signature, pointer_size);
     if (method != nullptr) {
       return method;
@@ -551,9 +600,10 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindVirtualMethod(
-    const StringPiece& name, const Signature& signature, PointerSize pointer_size) {
-  for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
+ArtMethod* Class::FindVirtualMethod(const StringPiece& name,
+                                    const Signature& signature,
+                                    PointerSize pointer_size) {
+  for (ObjPtr<Class> klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredVirtualMethod(name, signature, pointer_size);
     if (method != nullptr) {
       return method;
@@ -562,9 +612,10 @@
   return nullptr;
 }
 
-ArtMethod* Class::FindVirtualMethod(
-    const DexCache* dex_cache, uint32_t dex_method_idx, PointerSize pointer_size) {
-  for (Class* klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
+ArtMethod* Class::FindVirtualMethod(ObjPtr<DexCache> dex_cache,
+                                    uint32_t dex_method_idx,
+                                    PointerSize pointer_size) {
+  for (ObjPtr<Class> klass = this; klass != nullptr; klass = klass->GetSuperClass()) {
     ArtMethod* method = klass->FindDeclaredVirtualMethod(dex_cache, dex_method_idx, pointer_size);
     if (method != nullptr) {
       return method;
@@ -591,8 +642,8 @@
 
   Thread* self = Thread::Current();
   StackHandleScope<2> hs(self);
-  MutableHandle<mirror::IfTable> iftable(hs.NewHandle(GetIfTable()));
-  MutableHandle<mirror::Class> iface(hs.NewHandle<mirror::Class>(nullptr));
+  MutableHandle<IfTable> iftable(hs.NewHandle(GetIfTable()));
+  MutableHandle<Class> iface(hs.NewHandle<Class>(nullptr));
   size_t iftable_count = GetIfTableCount();
   // Find the method. We don't need to check for conflicts because they would have been in the
   // copied virtuals of this interface.  Order matters, traverse in reverse topological order; most
@@ -686,7 +737,7 @@
         break;
       }
     }
-    CHECK_EQ(found, ret) << "Found " << PrettyField(found) << " vs  " << PrettyField(ret);
+    CHECK_EQ(found, ret) << "Found " << found->PrettyField() << " vs  " << ret->PrettyField();
   }
   return ret;
 }
@@ -696,7 +747,7 @@
   return FindFieldByNameAndType(GetIFieldsPtr(), name, type);
 }
 
-ArtField* Class::FindDeclaredInstanceField(const DexCache* dex_cache, uint32_t dex_field_idx) {
+ArtField* Class::FindDeclaredInstanceField(ObjPtr<DexCache> dex_cache, uint32_t dex_field_idx) {
   if (GetDexCache() == dex_cache) {
     for (ArtField& field : GetIFields()) {
       if (field.GetDexFieldIndex() == dex_field_idx) {
@@ -710,7 +761,7 @@
 ArtField* Class::FindInstanceField(const StringPiece& name, const StringPiece& type) {
   // Is the field in this class, or any of its superclasses?
   // Interfaces are not relevant because they can't contain instance fields.
-  for (Class* c = this; c != nullptr; c = c->GetSuperClass()) {
+  for (ObjPtr<Class> c = this; c != nullptr; c = c->GetSuperClass()) {
     ArtField* f = c->FindDeclaredInstanceField(name, type);
     if (f != nullptr) {
       return f;
@@ -719,10 +770,10 @@
   return nullptr;
 }
 
-ArtField* Class::FindInstanceField(const DexCache* dex_cache, uint32_t dex_field_idx) {
+ArtField* Class::FindInstanceField(ObjPtr<DexCache> dex_cache, uint32_t dex_field_idx) {
   // Is the field in this class, or any of its superclasses?
   // Interfaces are not relevant because they can't contain instance fields.
-  for (Class* c = this; c != nullptr; c = c->GetSuperClass()) {
+  for (ObjPtr<Class> c = this; c != nullptr; c = c->GetSuperClass()) {
     ArtField* f = c->FindDeclaredInstanceField(dex_cache, dex_field_idx);
     if (f != nullptr) {
       return f;
@@ -736,7 +787,7 @@
   return FindFieldByNameAndType(GetSFieldsPtr(), name, type);
 }
 
-ArtField* Class::FindDeclaredStaticField(const DexCache* dex_cache, uint32_t dex_field_idx) {
+ArtField* Class::FindDeclaredStaticField(ObjPtr<DexCache> dex_cache, uint32_t dex_field_idx) {
   if (dex_cache == GetDexCache()) {
     for (ArtField& field : GetSFields()) {
       if (field.GetDexFieldIndex() == dex_field_idx) {
@@ -747,11 +798,13 @@
   return nullptr;
 }
 
-ArtField* Class::FindStaticField(Thread* self, Handle<Class> klass, const StringPiece& name,
+ArtField* Class::FindStaticField(Thread* self,
+                                 Handle<Class> klass,
+                                 const StringPiece& name,
                                  const StringPiece& type) {
   // Is the field in this class (or its interfaces), or any of its
   // superclasses (or their interfaces)?
-  for (Class* k = klass.Get(); k != nullptr; k = k->GetSuperClass()) {
+  for (ObjPtr<Class> k = klass.Get(); k != nullptr; k = k->GetSuperClass()) {
     // Is the field in this class?
     ArtField* f = k->FindDeclaredStaticField(name, type);
     if (f != nullptr) {
@@ -759,11 +812,11 @@
     }
     // Wrap k incase it moves during GetDirectInterface.
     StackHandleScope<1> hs(self);
-    HandleWrapper<mirror::Class> h_k(hs.NewHandleWrapper(&k));
+    HandleWrapperObjPtr<Class> h_k(hs.NewHandleWrapper(&k));
     // Is this field in any of this class' interfaces?
     for (uint32_t i = 0; i < h_k->NumDirectInterfaces(); ++i) {
       StackHandleScope<1> hs2(self);
-      Handle<mirror::Class> interface(hs2.NewHandle(GetDirectInterface(self, h_k, i)));
+      Handle<Class> interface(hs2.NewHandle(GetDirectInterface(self, h_k, i)));
       f = FindStaticField(self, interface, name, type);
       if (f != nullptr) {
         return f;
@@ -774,10 +827,10 @@
 }
 
 ArtField* Class::FindStaticField(Thread* self,
-                                 Class* klass,
-                                 const DexCache* dex_cache,
+                                 ObjPtr<Class> klass,
+                                 ObjPtr<DexCache> dex_cache,
                                  uint32_t dex_field_idx) {
-  for (Class* k = klass; k != nullptr; k = k->GetSuperClass()) {
+  for (ObjPtr<Class> k = klass; k != nullptr; k = k->GetSuperClass()) {
     // Is the field in this class?
     ArtField* f = k->FindDeclaredStaticField(dex_cache, dex_field_idx);
     if (f != nullptr) {
@@ -787,10 +840,10 @@
     // from here, it takes a Handle as an argument, so we need to wrap `k`.
     ScopedAssertNoThreadSuspension ants(__FUNCTION__);
     StackHandleScope<1> hs(self);
-    Handle<mirror::Class> h_k(hs.NewHandle(k));
+    Handle<Class> h_k(hs.NewHandle(k));
     // Is this field in any of this class' interfaces?
     for (uint32_t i = 0; i < h_k->NumDirectInterfaces(); ++i) {
-      mirror::Class* interface = GetDirectInterface(self, h_k, i);
+      ObjPtr<Class> interface = GetDirectInterface(self, h_k, i);
       f = FindStaticField(self, interface, dex_cache, dex_field_idx);
       if (f != nullptr) {
         return f;
@@ -800,10 +853,12 @@
   return nullptr;
 }
 
-ArtField* Class::FindField(Thread* self, Handle<Class> klass, const StringPiece& name,
+ArtField* Class::FindField(Thread* self,
+                           Handle<Class> klass,
+                           const StringPiece& name,
                            const StringPiece& type) {
   // Find a field using the JLS field resolution order
-  for (Class* k = klass.Get(); k != nullptr; k = k->GetSuperClass()) {
+  for (ObjPtr<Class> k = klass.Get(); k != nullptr; k = k->GetSuperClass()) {
     // Is the field in this class?
     ArtField* f = k->FindDeclaredInstanceField(name, type);
     if (f != nullptr) {
@@ -815,10 +870,10 @@
     }
     // Is this field in any of this class' interfaces?
     StackHandleScope<1> hs(self);
-    HandleWrapper<mirror::Class> h_k(hs.NewHandleWrapper(&k));
+    HandleWrapperObjPtr<Class> h_k(hs.NewHandleWrapper(&k));
     for (uint32_t i = 0; i < h_k->NumDirectInterfaces(); ++i) {
       StackHandleScope<1> hs2(self);
-      Handle<mirror::Class> interface(hs2.NewHandle(GetDirectInterface(self, h_k, i)));
+      Handle<Class> interface(hs2.NewHandle(GetDirectInterface(self, h_k, i)));
       f = interface->FindStaticField(self, interface, name, type);
       if (f != nullptr) {
         return f;
@@ -874,8 +929,9 @@
   return GetInterfaceTypeList()->GetTypeItem(idx).type_idx_;
 }
 
-mirror::Class* Class::GetDirectInterface(Thread* self, Handle<mirror::Class> klass,
-                                         uint32_t idx) {
+ObjPtr<Class> Class::GetDirectInterface(Thread* self,
+                                        Handle<Class> klass,
+                                        uint32_t idx) {
   DCHECK(klass.Get() != nullptr);
   DCHECK(!klass->IsPrimitive());
   if (klass->IsArrayClass()) {
@@ -887,12 +943,12 @@
       return class_linker->FindSystemClass(self, "Ljava/io/Serializable;");
     }
   } else if (klass->IsProxyClass()) {
-    mirror::ObjectArray<mirror::Class>* interfaces = klass.Get()->GetInterfaces();
+    ObjPtr<ObjectArray<Class>> interfaces = klass.Get()->GetInterfaces();
     DCHECK(interfaces != nullptr);
     return interfaces->Get(idx);
   } else {
     uint16_t type_idx = klass->GetDirectInterfaceTypeIdx(idx);
-    mirror::Class* interface = klass->GetDexCache()->GetResolvedType(type_idx);
+    ObjPtr<Class> interface = klass->GetDexCache()->GetResolvedType(type_idx);
     if (interface == nullptr) {
       interface = Runtime::Current()->GetClassLinker()->ResolveType(klass->GetDexFile(), type_idx,
                                                                     klass.Get());
@@ -902,15 +958,15 @@
   }
 }
 
-mirror::Class* Class::GetCommonSuperClass(Handle<Class> klass) {
+ObjPtr<Class> Class::GetCommonSuperClass(Handle<Class> klass) {
   DCHECK(klass.Get() != nullptr);
   DCHECK(!klass->IsInterface());
   DCHECK(!IsInterface());
-  mirror::Class* common_super_class = this;
+  ObjPtr<Class> common_super_class = this;
   while (!common_super_class->IsAssignableFrom(klass.Get())) {
-    mirror::Class* old_common = common_super_class;
+    ObjPtr<Class> old_common = common_super_class;
     common_super_class = old_common->GetSuperClass();
-    DCHECK(common_super_class != nullptr) << PrettyClass(old_common);
+    DCHECK(common_super_class != nullptr) << old_common->PrettyClass();
   }
   return common_super_class;
 }
@@ -926,7 +982,7 @@
 }
 
 std::string Class::GetLocation() {
-  mirror::DexCache* dex_cache = GetDexCache();
+  ObjPtr<DexCache> dex_cache = GetDexCache();
   if (dex_cache != nullptr && !IsProxyClass()) {
     return dex_cache->GetLocation()->ToModifiedUtf8();
   }
@@ -944,7 +1000,7 @@
 
 void Class::PopulateEmbeddedVTable(PointerSize pointer_size) {
   PointerArray* table = GetVTableDuringLinking();
-  CHECK(table != nullptr) << PrettyClass(this);
+  CHECK(table != nullptr) << PrettyClass();
   const size_t table_length = table->GetLength();
   SetEmbeddedVTableLength(table_length);
   for (size_t i = 0; i < table_length; i++) {
@@ -959,28 +1015,28 @@
 
 class ReadBarrierOnNativeRootsVisitor {
  public:
-  void operator()(mirror::Object* obj ATTRIBUTE_UNUSED,
+  void operator()(ObjPtr<Object> obj ATTRIBUTE_UNUSED,
                   MemberOffset offset ATTRIBUTE_UNUSED,
                   bool is_static ATTRIBUTE_UNUSED) const {}
 
-  void VisitRootIfNonNull(mirror::CompressedReference<mirror::Object>* root) const
+  void VisitRootIfNonNull(CompressedReference<Object>* root) const
       REQUIRES_SHARED(Locks::mutator_lock_) {
     if (!root->IsNull()) {
       VisitRoot(root);
     }
   }
 
-  void VisitRoot(mirror::CompressedReference<mirror::Object>* root) const
+  void VisitRoot(CompressedReference<Object>* root) const
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    mirror::Object* old_ref = root->AsMirrorPtr();
-    mirror::Object* new_ref = ReadBarrier::BarrierForRoot(root);
+    ObjPtr<Object> old_ref = root->AsMirrorPtr();
+    ObjPtr<Object> new_ref = ReadBarrier::BarrierForRoot(root);
     if (old_ref != new_ref) {
       // Update the field atomically. This may fail if mutator updates before us, but it's ok.
       auto* atomic_root =
-          reinterpret_cast<Atomic<mirror::CompressedReference<mirror::Object>>*>(root);
+          reinterpret_cast<Atomic<CompressedReference<Object>>*>(root);
       atomic_root->CompareExchangeStrongSequentiallyConsistent(
-          mirror::CompressedReference<mirror::Object>::FromMirrorPtr(old_ref),
-          mirror::CompressedReference<mirror::Object>::FromMirrorPtr(new_ref));
+          CompressedReference<Object>::FromMirrorPtr(old_ref.Ptr()),
+          CompressedReference<Object>::FromMirrorPtr(new_ref.Ptr()));
     }
   }
 };
@@ -989,7 +1045,7 @@
 class CopyClassVisitor {
  public:
   CopyClassVisitor(Thread* self,
-                   Handle<mirror::Class>* orig,
+                   Handle<Class>* orig,
                    size_t new_length,
                    size_t copy_bytes,
                    ImTable* imt,
@@ -998,24 +1054,24 @@
         copy_bytes_(copy_bytes), imt_(imt), pointer_size_(pointer_size) {
   }
 
-  void operator()(mirror::Object* obj, size_t usable_size ATTRIBUTE_UNUSED) const
+  void operator()(ObjPtr<Object> obj, size_t usable_size ATTRIBUTE_UNUSED) const
       REQUIRES_SHARED(Locks::mutator_lock_) {
     StackHandleScope<1> hs(self_);
     Handle<mirror::Class> h_new_class_obj(hs.NewHandle(obj->AsClass()));
-    mirror::Object::CopyObject(self_, h_new_class_obj.Get(), orig_->Get(), copy_bytes_);
-    mirror::Class::SetStatus(h_new_class_obj, Class::kStatusResolving, self_);
+    Object::CopyObject(h_new_class_obj.Get(), orig_->Get(), copy_bytes_);
+    Class::SetStatus(h_new_class_obj, Class::kStatusResolving, self_);
     h_new_class_obj->PopulateEmbeddedVTable(pointer_size_);
     h_new_class_obj->SetImt(imt_, pointer_size_);
     h_new_class_obj->SetClassSize(new_length_);
     // Visit all of the references to make sure there is no from space references in the native
     // roots.
-    static_cast<mirror::Object*>(h_new_class_obj.Get())->VisitReferences(
+    ObjPtr<Object>(h_new_class_obj.Get())->VisitReferences(
         ReadBarrierOnNativeRootsVisitor(), VoidFunctor());
   }
 
  private:
   Thread* const self_;
-  Handle<mirror::Class>* const orig_;
+  Handle<Class>* const orig_;
   const size_t new_length_;
   const size_t copy_bytes_;
   ImTable* imt_;
@@ -1027,12 +1083,12 @@
   DCHECK_GE(new_length, static_cast<int32_t>(sizeof(Class)));
   // We may get copied by a compacting GC.
   StackHandleScope<1> hs(self);
-  Handle<mirror::Class> h_this(hs.NewHandle(this));
+  Handle<Class> h_this(hs.NewHandle(this));
   gc::Heap* heap = Runtime::Current()->GetHeap();
   // The num_bytes (3rd param) is sizeof(Class) as opposed to SizeOf()
   // to skip copying the tail part that we will overwrite here.
   CopyClassVisitor visitor(self, &h_this, new_length, sizeof(Class), imt, pointer_size);
-  mirror::Object* new_class = kMovingClasses ?
+  ObjPtr<Object> new_class = kMovingClasses ?
       heap->AllocObject<true>(self, java_lang_Class_.Read(), new_length, visitor) :
       heap->AllocNonMovableObject<true>(self, java_lang_Class_.Read(), new_length, visitor);
   if (UNLIKELY(new_class == nullptr)) {
@@ -1049,7 +1105,7 @@
 
 // TODO: Move this to java_lang_Class.cc?
 ArtMethod* Class::GetDeclaredConstructor(
-    Thread* self, Handle<mirror::ObjectArray<mirror::Class>> args, PointerSize pointer_size) {
+    Thread* self, Handle<ObjectArray<Class>> args, PointerSize pointer_size) {
   for (auto& m : GetDirectMethods(pointer_size)) {
     // Skip <clinit> which is a static constructor, as well as non constructors.
     if (m.IsStatic() || !m.IsConstructor()) {
@@ -1068,7 +1124,7 @@
 
 uint32_t Class::Depth() {
   uint32_t depth = 0;
-  for (Class* klass = this; klass->GetSuperClass() != nullptr; klass = klass->GetSuperClass()) {
+  for (ObjPtr<Class> klass = this; klass->GetSuperClass() != nullptr; klass = klass->GetSuperClass()) {
     depth++;
   }
   return depth;
@@ -1081,10 +1137,11 @@
 }
 
 template <PointerSize kPointerSize, bool kTransactionActive>
-mirror::Method* Class::GetDeclaredMethodInternal(Thread* self,
-                                                 mirror::Class* klass,
-                                                 mirror::String* name,
-                                                 mirror::ObjectArray<mirror::Class>* args) {
+ObjPtr<Method> Class::GetDeclaredMethodInternal(
+    Thread* self,
+    ObjPtr<Class> klass,
+    ObjPtr<String> name,
+    ObjPtr<ObjectArray<Class>> args) {
   // Covariant return types permit the class to define multiple
   // methods with the same name and parameter types. Prefer to
   // return a non-synthetic method in such situations. We may
@@ -1099,12 +1156,12 @@
     return nullptr;
   }
   auto h_args = hs.NewHandle(args);
-  Handle<mirror::Class> h_klass = hs.NewHandle(klass);
+  Handle<Class> h_klass = hs.NewHandle(klass);
   ArtMethod* result = nullptr;
   for (auto& m : h_klass->GetDeclaredVirtualMethods(kPointerSize)) {
     auto* np_method = m.GetInterfaceMethodIfProxy(kPointerSize);
     // May cause thread suspension.
-    mirror::String* np_name = np_method->GetNameAsString(self);
+    ObjPtr<String> np_name = np_method->GetNameAsString(self);
     if (!np_name->Equals(h_method_name.Get()) || !np_method->EqualParameters(h_args)) {
       if (UNLIKELY(self->IsExceptionPending())) {
         return nullptr;
@@ -1113,7 +1170,7 @@
     }
     auto modifiers = m.GetAccessFlags();
     if ((modifiers & kSkipModifiers) == 0) {
-      return mirror::Method::CreateFromArtMethod<kPointerSize, kTransactionActive>(self, &m);
+      return Method::CreateFromArtMethod<kPointerSize, kTransactionActive>(self, &m);
     }
     if ((modifiers & kAccMiranda) == 0) {
       result = &m;  // Remember as potential result if it's not a miranda method.
@@ -1127,7 +1184,7 @@
       }
       auto* np_method = m.GetInterfaceMethodIfProxy(kPointerSize);
       // May cause thread suspension.
-      mirror::String* np_name = np_method->GetNameAsString(self);
+      ObjPtr<String> np_name = np_method->GetNameAsString(self);
       if (np_name == nullptr) {
         self->AssertPendingException();
         return nullptr;
@@ -1139,76 +1196,76 @@
         continue;
       }
       if ((modifiers & kSkipModifiers) == 0) {
-        return mirror::Method::CreateFromArtMethod<kPointerSize, kTransactionActive>(self, &m);
+        return Method::CreateFromArtMethod<kPointerSize, kTransactionActive>(self, &m);
       }
       // Direct methods cannot be miranda methods, so this potential result must be synthetic.
       result = &m;
     }
   }
   return result != nullptr
-      ? mirror::Method::CreateFromArtMethod<kPointerSize, kTransactionActive>(self, result)
+      ? Method::CreateFromArtMethod<kPointerSize, kTransactionActive>(self, result)
       : nullptr;
 }
 
 template
-mirror::Method* Class::GetDeclaredMethodInternal<PointerSize::k32, false>(
+ObjPtr<Method> Class::GetDeclaredMethodInternal<PointerSize::k32, false>(
     Thread* self,
-    mirror::Class* klass,
-    mirror::String* name,
-    mirror::ObjectArray<mirror::Class>* args);
+    ObjPtr<Class> klass,
+    ObjPtr<String> name,
+    ObjPtr<ObjectArray<Class>> args);
 template
-mirror::Method* Class::GetDeclaredMethodInternal<PointerSize::k32, true>(
+ObjPtr<Method> Class::GetDeclaredMethodInternal<PointerSize::k32, true>(
     Thread* self,
-    mirror::Class* klass,
-    mirror::String* name,
-    mirror::ObjectArray<mirror::Class>* args);
+    ObjPtr<Class> klass,
+    ObjPtr<String> name,
+    ObjPtr<ObjectArray<Class>> args);
 template
-mirror::Method* Class::GetDeclaredMethodInternal<PointerSize::k64, false>(
+ObjPtr<Method> Class::GetDeclaredMethodInternal<PointerSize::k64, false>(
     Thread* self,
-    mirror::Class* klass,
-    mirror::String* name,
-    mirror::ObjectArray<mirror::Class>* args);
+    ObjPtr<Class> klass,
+    ObjPtr<String> name,
+    ObjPtr<ObjectArray<Class>> args);
 template
-mirror::Method* Class::GetDeclaredMethodInternal<PointerSize::k64, true>(
+ObjPtr<Method> Class::GetDeclaredMethodInternal<PointerSize::k64, true>(
     Thread* self,
-    mirror::Class* klass,
-    mirror::String* name,
-    mirror::ObjectArray<mirror::Class>* args);
+    ObjPtr<Class> klass,
+    ObjPtr<String> name,
+    ObjPtr<ObjectArray<Class>> args);
 
 template <PointerSize kPointerSize, bool kTransactionActive>
-mirror::Constructor* Class::GetDeclaredConstructorInternal(
+ObjPtr<Constructor> Class::GetDeclaredConstructorInternal(
     Thread* self,
-    mirror::Class* klass,
-    mirror::ObjectArray<mirror::Class>* args) {
+    ObjPtr<Class> klass,
+    ObjPtr<ObjectArray<Class>> args) {
   StackHandleScope<1> hs(self);
   ArtMethod* result = klass->GetDeclaredConstructor(self, hs.NewHandle(args), kPointerSize);
   return result != nullptr
-      ? mirror::Constructor::CreateFromArtMethod<kPointerSize, kTransactionActive>(self, result)
+      ? Constructor::CreateFromArtMethod<kPointerSize, kTransactionActive>(self, result)
       : nullptr;
 }
 
-// mirror::Constructor::CreateFromArtMethod<kTransactionActive>(self, result)
+// Constructor::CreateFromArtMethod<kTransactionActive>(self, result)
 
 template
-mirror::Constructor* Class::GetDeclaredConstructorInternal<PointerSize::k32, false>(
+ObjPtr<Constructor> Class::GetDeclaredConstructorInternal<PointerSize::k32, false>(
     Thread* self,
-    mirror::Class* klass,
-    mirror::ObjectArray<mirror::Class>* args);
+    ObjPtr<Class> klass,
+    ObjPtr<ObjectArray<Class>> args);
 template
-mirror::Constructor* Class::GetDeclaredConstructorInternal<PointerSize::k32, true>(
+ObjPtr<Constructor> Class::GetDeclaredConstructorInternal<PointerSize::k32, true>(
     Thread* self,
-    mirror::Class* klass,
-    mirror::ObjectArray<mirror::Class>* args);
+    ObjPtr<Class> klass,
+    ObjPtr<ObjectArray<Class>> args);
 template
-mirror::Constructor* Class::GetDeclaredConstructorInternal<PointerSize::k64, false>(
+ObjPtr<Constructor> Class::GetDeclaredConstructorInternal<PointerSize::k64, false>(
     Thread* self,
-    mirror::Class* klass,
-    mirror::ObjectArray<mirror::Class>* args);
+    ObjPtr<Class> klass,
+    ObjPtr<ObjectArray<Class>> args);
 template
-mirror::Constructor* Class::GetDeclaredConstructorInternal<PointerSize::k64, true>(
+ObjPtr<Constructor> Class::GetDeclaredConstructorInternal<PointerSize::k64, true>(
     Thread* self,
-    mirror::Class* klass,
-    mirror::ObjectArray<mirror::Class>* args);
+    ObjPtr<Class> klass,
+    ObjPtr<ObjectArray<Class>> args);
 
 int32_t Class::GetInnerClassFlags(Handle<Class> h_this, int32_t default_value) {
   if (h_this->IsProxyClass() || h_this->GetDexCache() == nullptr) {
@@ -1229,5 +1286,50 @@
   }
 }
 
+std::string Class::PrettyDescriptor(ObjPtr<mirror::Class> klass) {
+  if (klass == nullptr) {
+    return "null";
+  }
+  return klass->PrettyDescriptor();
+}
+
+std::string Class::PrettyDescriptor() {
+  std::string temp;
+  return art::PrettyDescriptor(GetDescriptor(&temp));
+}
+
+std::string Class::PrettyClass(ObjPtr<mirror::Class> c) {
+  if (c == nullptr) {
+    return "null";
+  }
+  return c->PrettyClass();
+}
+
+std::string Class::PrettyClass() {
+  std::string result;
+  result += "java.lang.Class<";
+  result += PrettyDescriptor();
+  result += ">";
+  return result;
+}
+
+std::string Class::PrettyClassAndClassLoader(ObjPtr<mirror::Class> c) {
+  if (c == nullptr) {
+    return "null";
+  }
+  return c->PrettyClassAndClassLoader();
+}
+
+std::string Class::PrettyClassAndClassLoader() {
+  std::string result;
+  result += "java.lang.Class<";
+  result += PrettyDescriptor();
+  result += ",";
+  result += mirror::Object::PrettyTypeOf(GetClassLoader());
+  // TODO: add an identifying hash value for the loader
+  result += ">";
+  return result;
+}
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/class.h b/runtime/mirror/class.h
index a0d6f37..711914d 100644
--- a/runtime/mirror/class.h
+++ b/runtime/mirror/class.h
@@ -49,6 +49,7 @@
 
 namespace mirror {
 
+class ClassExt;
 class ClassLoader;
 class Constructor;
 class DexCache;
@@ -56,7 +57,7 @@
 class Method;
 template <typename T> struct PACKED(8) DexCachePair;
 
-using StringDexCachePair = DexCachePair<mirror::String>;
+using StringDexCachePair = DexCachePair<String>;
 using StringDexCacheType = std::atomic<StringDexCachePair>;
 
 // C++ mirror of java.lang.Class
@@ -337,18 +338,7 @@
   // For array classes, where all the classes are final due to there being no sub-classes, an
   // Object[] may be assigned to by a String[] but a String[] may not be assigned to by other
   // types as the component is final.
-  bool CannotBeAssignedFromOtherTypes() REQUIRES_SHARED(Locks::mutator_lock_) {
-    if (!IsArrayClass()) {
-      return IsFinal();
-    } else {
-      Class* component = GetComponentType();
-      if (component->IsPrimitive()) {
-        return true;
-      } else {
-        return component->CannotBeAssignedFromOtherTypes();
-      }
-    }
-  }
+  bool CannotBeAssignedFromOtherTypes() REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Returns true if this class is the placeholder and should retire and
   // be replaced with a class with the right size for embedded imt/vtable.
@@ -358,7 +348,7 @@
   }
 
   String* GetName() REQUIRES_SHARED(Locks::mutator_lock_);  // Returns the cached name.
-  void SetName(String* name) REQUIRES_SHARED(Locks::mutator_lock_);  // Sets the cached name.
+  void SetName(ObjPtr<String> name) REQUIRES_SHARED(Locks::mutator_lock_);  // Sets the cached name.
   // Computes the name, then sets the cached value.
   static String* ComputeName(Handle<Class> h_this) REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Roles::uninterruptible_);
@@ -473,7 +463,7 @@
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   Class* GetComponentType() REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void SetComponentType(Class* new_component_type) REQUIRES_SHARED(Locks::mutator_lock_) {
+  void SetComponentType(ObjPtr<Class> new_component_type) REQUIRES_SHARED(Locks::mutator_lock_) {
     DCHECK(GetComponentType() == nullptr);
     DCHECK(new_component_type != nullptr);
     // Component type is invariant: use non-transactional mode without check.
@@ -508,7 +498,7 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   bool IsObjectArrayClass() REQUIRES_SHARED(Locks::mutator_lock_) {
-    mirror::Class* const component_type = GetComponentType<kVerifyFlags, kReadBarrierOption>();
+    ObjPtr<Class> const component_type = GetComponentType<kVerifyFlags, kReadBarrierOption>();
     return component_type != nullptr && !component_type->IsPrimitive();
   }
 
@@ -528,12 +518,12 @@
 
   // Creates a raw object instance but does not invoke the default constructor.
   template<bool kIsInstrumented, bool kCheckAddFinalizer = true>
-  ALWAYS_INLINE Object* Alloc(Thread* self, gc::AllocatorType allocator_type)
+  ALWAYS_INLINE ObjPtr<Object> Alloc(Thread* self, gc::AllocatorType allocator_type)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
-  Object* AllocObject(Thread* self)
+  ObjPtr<Object> AllocObject(Thread* self)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
-  Object* AllocNonMovableObject(Thread* self)
+  ObjPtr<Object> AllocNonMovableObject(Thread* self)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
@@ -572,7 +562,7 @@
   // The size of java.lang.Class.class.
   static uint32_t ClassClassSize(PointerSize pointer_size) {
     // The number of vtable entries in java.lang.Class.
-    uint32_t vtable_entries = Object::kVTableLength + 72;
+    uint32_t vtable_entries = Object::kVTableLength + 73;
     return ComputeClassSize(true, vtable_entries, 0, 0, 4, 1, 0, pointer_size);
   }
 
@@ -638,11 +628,14 @@
   // Can this class access a resolved method?
   // Note that access to methods's class is checked and this may require looking up the class
   // referenced by the MethodId in the DexFile in case the declaring class is inaccessible.
-  bool CanAccessResolvedMethod(Class* access_to, ArtMethod* resolved_method,
-                               DexCache* dex_cache, uint32_t method_idx)
+  bool CanAccessResolvedMethod(ObjPtr<Class> access_to,
+                               ArtMethod* resolved_method,
+                               ObjPtr<DexCache> dex_cache,
+                               uint32_t method_idx)
       REQUIRES_SHARED(Locks::mutator_lock_);
   template <InvokeType throw_invoke_type>
-  bool CheckResolvedMethodAccess(Class* access_to, ArtMethod* resolved_method,
+  bool CheckResolvedMethodAccess(ObjPtr<Class> access_to,
+                                 ArtMethod* resolved_method,
                                  uint32_t method_idx)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -661,11 +654,12 @@
 
   // Get first common super class. It will never return null.
   // `This` and `klass` must be classes.
-  Class* GetCommonSuperClass(Handle<Class> klass) REQUIRES_SHARED(Locks::mutator_lock_);
+  ObjPtr<Class> GetCommonSuperClass(Handle<Class> klass) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void SetSuperClass(Class* new_super_class) REQUIRES_SHARED(Locks::mutator_lock_) {
+  void SetSuperClass(ObjPtr<Class> new_super_class) REQUIRES_SHARED(Locks::mutator_lock_) {
     // Super class is assigned once, except during class linker initialization.
-    Class* old_super_class = GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(Class, super_class_));
+    ObjPtr<Class> old_super_class =
+        GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(Class, super_class_));
     DCHECK(old_super_class == nullptr || old_super_class == new_super_class);
     DCHECK(new_super_class != nullptr);
     SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(Class, super_class_), new_super_class);
@@ -681,12 +675,16 @@
 
   ClassLoader* GetClassLoader() ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void SetClassLoader(ClassLoader* new_cl) REQUIRES_SHARED(Locks::mutator_lock_);
+  void SetClassLoader(ObjPtr<ClassLoader> new_cl) REQUIRES_SHARED(Locks::mutator_lock_);
 
   static MemberOffset DexCacheOffset() {
     return MemberOffset(OFFSETOF_MEMBER(Class, dex_cache_));
   }
 
+  static MemberOffset IfTableOffset() {
+    return MemberOffset(OFFSETOF_MEMBER(Class, iftable_));
+  }
+
   enum {
     kDumpClassFullDetail = 1,
     kDumpClassClassLoader = (1 << 1),
@@ -699,7 +697,7 @@
   DexCache* GetDexCache() REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Also updates the dex_cache_strings_ variable from new_dex_cache.
-  void SetDexCache(DexCache* new_dex_cache) REQUIRES_SHARED(Locks::mutator_lock_);
+  void SetDexCache(ObjPtr<DexCache> new_dex_cache) REQUIRES_SHARED(Locks::mutator_lock_);
 
   ALWAYS_INLINE IterationRange<StrideIterator<ArtMethod>> GetDirectMethods(PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
@@ -753,15 +751,16 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   template <PointerSize kPointerSize, bool kTransactionActive>
-  static Method* GetDeclaredMethodInternal(Thread* self,
-                                           mirror::Class* klass,
-                                           mirror::String* name,
-                                           mirror::ObjectArray<mirror::Class>* args)
+  static ObjPtr<Method> GetDeclaredMethodInternal(Thread* self,
+                                                  ObjPtr<Class> klass,
+                                                  ObjPtr<String> name,
+                                                  ObjPtr<ObjectArray<Class>> args)
       REQUIRES_SHARED(Locks::mutator_lock_);
+
   template <PointerSize kPointerSize, bool kTransactionActive>
-  static Constructor* GetDeclaredConstructorInternal(Thread* self,
-                                                     mirror::Class* klass,
-                                                     mirror::ObjectArray<mirror::Class>* args)
+  static ObjPtr<Constructor> GetDeclaredConstructorInternal(Thread* self,
+                                                            ObjPtr<Class> klass,
+                                                            ObjPtr<ObjectArray<Class>> args)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
@@ -894,69 +893,86 @@
   ArtMethod* FindVirtualMethodForVirtualOrInterface(ArtMethod* method, PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ArtMethod* FindInterfaceMethod(const StringPiece& name, const StringPiece& signature,
+  ArtMethod* FindInterfaceMethod(const StringPiece& name,
+                                 const StringPiece& signature,
                                  PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ArtMethod* FindInterfaceMethod(const StringPiece& name, const Signature& signature,
+  ArtMethod* FindInterfaceMethod(const StringPiece& name,
+                                 const Signature& signature,
                                  PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ArtMethod* FindInterfaceMethod(const DexCache* dex_cache, uint32_t dex_method_idx,
+  ArtMethod* FindInterfaceMethod(ObjPtr<DexCache> dex_cache,
+                                 uint32_t dex_method_idx,
                                  PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ArtMethod* FindDeclaredDirectMethod(const StringPiece& name, const StringPiece& signature,
+  ArtMethod* FindDeclaredDirectMethod(const StringPiece& name,
+                                      const StringPiece& signature,
                                       PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ArtMethod* FindDeclaredDirectMethod(const StringPiece& name, const Signature& signature,
+  ArtMethod* FindDeclaredDirectMethod(const StringPiece& name,
+                                      const Signature& signature,
                                       PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ArtMethod* FindDeclaredDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx,
+  ArtMethod* FindDeclaredDirectMethod(ObjPtr<DexCache> dex_cache,
+                                      uint32_t dex_method_idx,
                                       PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ArtMethod* FindDirectMethod(const StringPiece& name, const StringPiece& signature,
+  ArtMethod* FindDirectMethod(const StringPiece& name,
+                              const StringPiece& signature,
                               PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ArtMethod* FindDirectMethod(const StringPiece& name, const Signature& signature,
+  ArtMethod* FindDirectMethod(const StringPiece& name,
+                              const Signature& signature,
                               PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ArtMethod* FindDirectMethod(const DexCache* dex_cache, uint32_t dex_method_idx,
+  ArtMethod* FindDirectMethod(ObjPtr<DexCache> dex_cache,
+                              uint32_t dex_method_idx,
                               PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ArtMethod* FindDeclaredVirtualMethod(const StringPiece& name, const StringPiece& signature,
+  ArtMethod* FindDeclaredVirtualMethod(const StringPiece& name,
+                                       const StringPiece& signature,
                                        PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ArtMethod* FindDeclaredVirtualMethod(const StringPiece& name, const Signature& signature,
+  ArtMethod* FindDeclaredVirtualMethod(const StringPiece& name,
+                                       const Signature& signature,
                                        PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ArtMethod* FindDeclaredVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx,
+  ArtMethod* FindDeclaredVirtualMethod(ObjPtr<DexCache> dex_cache,
+                                       uint32_t dex_method_idx,
                                        PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ArtMethod* FindDeclaredVirtualMethodByName(const StringPiece& name, PointerSize pointer_size)
+  ArtMethod* FindDeclaredVirtualMethodByName(const StringPiece& name,
+                                             PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ArtMethod* FindDeclaredDirectMethodByName(const StringPiece& name, PointerSize pointer_size)
+  ArtMethod* FindDeclaredDirectMethodByName(const StringPiece& name,
+                                            PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ArtMethod* FindVirtualMethod(const StringPiece& name, const StringPiece& signature,
+  ArtMethod* FindVirtualMethod(const StringPiece& name,
+                               const StringPiece& signature,
                                PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ArtMethod* FindVirtualMethod(const StringPiece& name, const Signature& signature,
+  ArtMethod* FindVirtualMethod(const StringPiece& name,
+                               const Signature& signature,
                                PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ArtMethod* FindVirtualMethod(const DexCache* dex_cache, uint32_t dex_method_idx,
+  ArtMethod* FindVirtualMethod(ObjPtr<DexCache> dex_cache,
+                               uint32_t dex_method_idx,
                                PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -976,7 +992,8 @@
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE IfTable* GetIfTable() REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ALWAYS_INLINE void SetIfTable(IfTable* new_iftable) REQUIRES_SHARED(Locks::mutator_lock_);
+  ALWAYS_INLINE void SetIfTable(ObjPtr<IfTable> new_iftable)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Get instance fields of the class (See also GetSFields).
   LengthPrefixedArray<ArtField>* GetIFieldsPtr() REQUIRES_SHARED(Locks::mutator_lock_);
@@ -1077,45 +1094,50 @@
 
   // Finds the given instance field in this class or a superclass, only searches classes that
   // have the same dex cache.
-  ArtField* FindInstanceField(const DexCache* dex_cache, uint32_t dex_field_idx)
+  ArtField* FindInstanceField(ObjPtr<DexCache> dex_cache, uint32_t dex_field_idx)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   ArtField* FindDeclaredInstanceField(const StringPiece& name, const StringPiece& type)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ArtField* FindDeclaredInstanceField(const DexCache* dex_cache, uint32_t dex_field_idx)
+  ArtField* FindDeclaredInstanceField(ObjPtr<DexCache> dex_cache, uint32_t dex_field_idx)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Finds the given static field in this class or a superclass.
-  static ArtField* FindStaticField(Thread* self, Handle<Class> klass, const StringPiece& name,
+  static ArtField* FindStaticField(Thread* self,
+                                   Handle<Class> klass,
+                                   const StringPiece& name,
                                    const StringPiece& type)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Finds the given static field in this class or superclass, only searches classes that
   // have the same dex cache.
   static ArtField* FindStaticField(Thread* self,
-                                   Class* klass,
-                                   const DexCache* dex_cache,
+                                   ObjPtr<Class> klass,
+                                   ObjPtr<DexCache> dex_cache,
                                    uint32_t dex_field_idx)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   ArtField* FindDeclaredStaticField(const StringPiece& name, const StringPiece& type)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ArtField* FindDeclaredStaticField(const DexCache* dex_cache, uint32_t dex_field_idx)
+  ArtField* FindDeclaredStaticField(ObjPtr<DexCache> dex_cache, uint32_t dex_field_idx)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   pid_t GetClinitThreadId() REQUIRES_SHARED(Locks::mutator_lock_) {
-    DCHECK(IsIdxLoaded() || IsErroneous()) << PrettyClass(this);
+    DCHECK(IsIdxLoaded() || IsErroneous()) << PrettyClass();
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, clinit_thread_id_));
   }
 
   void SetClinitThreadId(pid_t new_clinit_thread_id) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  Object* GetVerifyError() REQUIRES_SHARED(Locks::mutator_lock_) {
-    // DCHECK(IsErroneous());
-    return GetFieldObject<Class>(OFFSET_OF_OBJECT_MEMBER(Class, verify_error_));
-  }
+  ClassExt* GetExtData() REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Returns the ExtData for this class, allocating one if necessary. This should be the only way
+  // to force ext_data_ to be set. No functions are available for changing an already set ext_data_
+  // since doing so is not allowed.
+  ClassExt* EnsureExtDataPresent(Thread* self)
+      REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
   uint16_t GetDexClassDefIndex() REQUIRES_SHARED(Locks::mutator_lock_) {
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Class, dex_class_def_idx_));
@@ -1148,7 +1170,7 @@
   }
 
   // Can't call this SetClass or else gets called instead of Object::SetClass in places.
-  static void SetClassClass(Class* java_lang_Class) REQUIRES_SHARED(Locks::mutator_lock_);
+  static void SetClassClass(ObjPtr<Class> java_lang_Class) REQUIRES_SHARED(Locks::mutator_lock_);
   static void ResetClass();
   static void VisitRoots(RootVisitor* visitor)
       REQUIRES_SHARED(Locks::mutator_lock_);
@@ -1178,8 +1200,9 @@
 
   uint16_t GetDirectInterfaceTypeIdx(uint32_t idx) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  static mirror::Class* GetDirectInterface(Thread* self, Handle<mirror::Class> klass,
-                                           uint32_t idx)
+  static ObjPtr<Class> GetDirectInterface(Thread* self,
+                                          Handle<Class> klass,
+                                          uint32_t idx)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   const char* GetSourceFile() REQUIRES_SHARED(Locks::mutator_lock_);
@@ -1194,7 +1217,9 @@
   void AssertInitializedOrInitializingInThread(Thread* self)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  Class* CopyOf(Thread* self, int32_t new_length, ImTable* imt,
+  Class* CopyOf(Thread* self,
+                int32_t new_length,
+                ImTable* imt,
                 PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
@@ -1218,8 +1243,9 @@
   }
 
   // May cause thread suspension due to EqualParameters.
-  ArtMethod* GetDeclaredConstructor(
-      Thread* self, Handle<mirror::ObjectArray<mirror::Class>> args, PointerSize pointer_size)
+  ArtMethod* GetDeclaredConstructor(Thread* self,
+                                    Handle<ObjectArray<Class>> args,
+                                    PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   static int32_t GetInnerClassFlags(Handle<Class> h_this, int32_t default_value)
@@ -1232,7 +1258,7 @@
     explicit InitializeClassVisitor(uint32_t class_size) : class_size_(class_size) {
     }
 
-    void operator()(mirror::Object* obj, size_t usable_size) const
+    void operator()(ObjPtr<Object> obj, size_t usable_size) const
         REQUIRES_SHARED(Locks::mutator_lock_);
 
    private:
@@ -1270,6 +1296,22 @@
   ALWAYS_INLINE ArraySlice<ArtMethod> GetCopiedMethodsSliceUnchecked(PointerSize pointer_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  static std::string PrettyDescriptor(ObjPtr<mirror::Class> klass)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+  std::string PrettyDescriptor()
+      REQUIRES_SHARED(Locks::mutator_lock_);
+  // Returns a human-readable form of the name of the given class.
+  // Given String.class, the output would be "java.lang.Class<java.lang.String>".
+  static std::string PrettyClass(ObjPtr<mirror::Class> c)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+  std::string PrettyClass()
+      REQUIRES_SHARED(Locks::mutator_lock_);
+  // Returns a human-readable form of the name of the given class with its class loader.
+  static std::string PrettyClassAndClassLoader(ObjPtr<mirror::Class> c)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+  std::string PrettyClassAndClassLoader()
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   // Fix up all of the native pointers in the class by running them through the visitor. Only sets
   // the corresponding entry in dest if visitor(obj) != obj to prevent dirty memory. Dest should be
   // initialized to a copy of *this to prevent issues. Does not visit the ArtMethod and ArtField
@@ -1277,15 +1319,13 @@
   template <VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
             ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
             typename Visitor>
-  void FixupNativePointers(mirror::Class* dest, PointerSize pointer_size, const Visitor& visitor)
+  void FixupNativePointers(Class* dest, PointerSize pointer_size, const Visitor& visitor)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
   ALWAYS_INLINE void SetMethodsPtrInternal(LengthPrefixedArray<ArtMethod>* new_methods)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void SetVerifyError(Object* klass) REQUIRES_SHARED(Locks::mutator_lock_);
-
   template <bool throw_on_failure, bool use_referrers_cache>
   bool ResolvedFieldAccessTest(ObjPtr<Class> access_to,
                                ArtField* field,
@@ -1300,7 +1340,7 @@
                                 ObjPtr<DexCache> dex_cache)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  bool Implements(Class* klass) REQUIRES_SHARED(Locks::mutator_lock_);
+  bool Implements(ObjPtr<Class> klass) REQUIRES_SHARED(Locks::mutator_lock_);
   bool IsArrayAssignableFromArray(ObjPtr<Class> klass) REQUIRES_SHARED(Locks::mutator_lock_);
   bool IsAssignableFromArray(ObjPtr<Class> klass) REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -1333,7 +1373,7 @@
             VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
             ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
             typename Visitor>
-  void VisitReferences(mirror::Class* klass, const Visitor& visitor)
+  void VisitReferences(ObjPtr<Class> klass, const Visitor& visitor)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // 'Class' Object Fields
@@ -1350,6 +1390,12 @@
   // runtime such as arrays and primitive classes).
   HeapReference<DexCache> dex_cache_;
 
+  // Extraneous class data that is not always needed. This field is allocated lazily and may
+  // only be set with 'this' locked. This is synchronized on 'this'.
+  // TODO(allight) We should probably synchronize it on something external or handle allocation in
+  // some other (safe) way to prevent possible deadlocks.
+  HeapReference<ClassExt> ext_data_;
+
   // The interface table (iftable_) contains pairs of a interface class and an array of the
   // interface methods. There is one pair per interface supported by this class.  That means one
   // pair for each interface we support directly, indirectly via superclass, or indirectly via a
@@ -1374,10 +1420,6 @@
   // check for interfaces and return null.
   HeapReference<Class> super_class_;
 
-  // If class verify fails, we must return same error on subsequent tries. We may store either
-  // the class of the error, or an actual instance of Throwable here.
-  HeapReference<Object> verify_error_;
-
   // Virtual method table (vtable), for use by "invoke-virtual".  The vtable from the superclass is
   // copied in, and virtual methods from our class either replace those from the super or are
   // appended. For abstract classes, methods may be created in the vtable that aren't in
diff --git a/runtime/mirror/class_ext.cc b/runtime/mirror/class_ext.cc
new file mode 100644
index 0000000..cc208e4
--- /dev/null
+++ b/runtime/mirror/class_ext.cc
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "class_ext.h"
+
+#include "art_method-inl.h"
+#include "base/casts.h"
+#include "base/enums.h"
+#include "class-inl.h"
+#include "dex_file-inl.h"
+#include "gc/accounting/card_table-inl.h"
+#include "object-inl.h"
+#include "object_array.h"
+#include "object_array-inl.h"
+#include "stack_trace_element.h"
+#include "utils.h"
+#include "well_known_classes.h"
+
+namespace art {
+namespace mirror {
+
+GcRoot<Class> ClassExt::dalvik_system_ClassExt_;
+
+ClassExt* ClassExt::Alloc(Thread* self) {
+  DCHECK(dalvik_system_ClassExt_.Read() != nullptr);
+  return down_cast<ClassExt*>(dalvik_system_ClassExt_.Read()->AllocObject(self).Ptr());
+}
+
+void ClassExt::SetVerifyError(ObjPtr<Object> err) {
+  if (Runtime::Current()->IsActiveTransaction()) {
+    SetFieldObject<true>(OFFSET_OF_OBJECT_MEMBER(ClassExt, verify_error_), err);
+  } else {
+    SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(ClassExt, verify_error_), err);
+  }
+}
+
+void ClassExt::SetClass(ObjPtr<Class> dalvik_system_ClassExt) {
+  CHECK(dalvik_system_ClassExt != nullptr);
+  dalvik_system_ClassExt_ = GcRoot<Class>(dalvik_system_ClassExt);
+}
+
+void ClassExt::ResetClass() {
+  CHECK(!dalvik_system_ClassExt_.IsNull());
+  dalvik_system_ClassExt_ = GcRoot<Class>(nullptr);
+}
+
+void ClassExt::VisitRoots(RootVisitor* visitor) {
+  dalvik_system_ClassExt_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
+}
+
+}  // namespace mirror
+}  // namespace art
diff --git a/runtime/mirror/class_ext.h b/runtime/mirror/class_ext.h
new file mode 100644
index 0000000..35eaae1
--- /dev/null
+++ b/runtime/mirror/class_ext.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_MIRROR_CLASS_EXT_H_
+#define ART_RUNTIME_MIRROR_CLASS_EXT_H_
+
+#include "class-inl.h"
+
+#include "gc_root.h"
+#include "object.h"
+#include "object_callbacks.h"
+#include "string.h"
+
+namespace art {
+
+struct ClassExtOffsets;
+
+namespace mirror {
+
+// C++ mirror of dalvik.system.ClassExt
+class MANAGED ClassExt : public Object {
+ public:
+  static uint32_t ClassSize(PointerSize pointer_size) {
+    uint32_t vtable_entries = Object::kVTableLength;
+    return Class::ComputeClassSize(true, vtable_entries, 0, 0, 0, 0, 0, pointer_size);
+  }
+
+  // Size of an instance of dalvik.system.ClassExt.
+  static constexpr uint32_t InstanceSize() {
+    return sizeof(ClassExt);
+  }
+
+  void SetVerifyError(ObjPtr<Object> obj) REQUIRES_SHARED(Locks::mutator_lock_);
+
+  Object* GetVerifyError() REQUIRES_SHARED(Locks::mutator_lock_) {
+    return GetFieldObject<ClassExt>(OFFSET_OF_OBJECT_MEMBER(ClassExt, verify_error_));
+  }
+
+  static void SetClass(ObjPtr<Class> dalvik_system_ClassExt);
+  static void ResetClass();
+  static void VisitRoots(RootVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_);
+
+  static ClassExt* Alloc(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_);
+
+ private:
+  // Field order required by test "ValidateFieldOrderOfJavaCppUnionClasses".
+  HeapReference<Object> verify_error_;
+
+  static GcRoot<Class> dalvik_system_ClassExt_;
+
+  friend struct art::ClassExtOffsets;  // for verifying offset information
+  DISALLOW_IMPLICIT_CONSTRUCTORS(ClassExt);
+};
+
+}  // namespace mirror
+}  // namespace art
+
+#endif  // ART_RUNTIME_MIRROR_CLASS_EXT_H_
diff --git a/runtime/mirror/class_loader-inl.h b/runtime/mirror/class_loader-inl.h
index cc910b0..f5ecdae 100644
--- a/runtime/mirror/class_loader-inl.h
+++ b/runtime/mirror/class_loader-inl.h
@@ -21,6 +21,7 @@
 
 #include "base/mutex-inl.h"
 #include "class_table-inl.h"
+#include "obj_ptr-inl.h"
 
 namespace art {
 namespace mirror {
@@ -29,7 +30,7 @@
           VerifyObjectFlags kVerifyFlags,
           ReadBarrierOption kReadBarrierOption,
           typename Visitor>
-inline void ClassLoader::VisitReferences(mirror::Class* klass, const Visitor& visitor) {
+inline void ClassLoader::VisitReferences(ObjPtr<mirror::Class> klass, const Visitor& visitor) {
   // Visit instance fields first.
   VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass, visitor);
   if (kVisitClasses) {
diff --git a/runtime/mirror/class_loader.h b/runtime/mirror/class_loader.h
index 407678a..a62a460 100644
--- a/runtime/mirror/class_loader.h
+++ b/runtime/mirror/class_loader.h
@@ -67,7 +67,7 @@
             VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
             ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
             typename Visitor>
-  void VisitReferences(mirror::Class* klass, const Visitor& visitor)
+  void VisitReferences(ObjPtr<Class> klass, const Visitor& visitor)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Locks::classlinker_classes_lock_);
 
diff --git a/runtime/mirror/dex_cache-inl.h b/runtime/mirror/dex_cache-inl.h
index b388f65..c7a123b 100644
--- a/runtime/mirror/dex_cache-inl.h
+++ b/runtime/mirror/dex_cache-inl.h
@@ -24,9 +24,11 @@
 #include "base/casts.h"
 #include "base/enums.h"
 #include "base/logging.h"
+#include "gc_root.h"
 #include "mirror/class.h"
 #include "mirror/method_type.h"
 #include "runtime.h"
+#include "obj_ptr.h"
 
 #include <atomic>
 
@@ -43,8 +45,8 @@
   return StringDexCachePair::Lookup(GetStrings(), string_idx, NumStrings()).Read();
 }
 
-inline void DexCache::SetResolvedString(uint32_t string_idx, mirror::String* resolved) {
-  StringDexCachePair::Assign(GetStrings(), string_idx, resolved, NumStrings());
+inline void DexCache::SetResolvedString(uint32_t string_idx, ObjPtr<mirror::String> resolved) {
+  StringDexCachePair::Assign(GetStrings(), string_idx, resolved.Ptr(), NumStrings());
   Runtime* const runtime = Runtime::Current();
   if (UNLIKELY(runtime->IsActiveTransaction())) {
     DCHECK(runtime->IsAotCompiler());
@@ -72,7 +74,7 @@
   return GetResolvedTypes()[type_idx].Read();
 }
 
-inline void DexCache::SetResolvedType(uint32_t type_idx, Class* resolved) {
+inline void DexCache::SetResolvedType(uint32_t type_idx, ObjPtr<Class> resolved) {
   DCHECK_LT(type_idx, NumResolvedTypes());  // NOTE: Unchecked, i.e. not throwing AIOOB.
   // TODO default transaction support.
   GetResolvedTypes()[type_idx] = GcRoot<Class>(resolved);
@@ -158,30 +160,47 @@
   }
 }
 
+template <typename T,
+          ReadBarrierOption kReadBarrierOption,
+          typename Visitor>
+inline void VisitDexCachePairs(std::atomic<DexCachePair<T>>* pairs,
+                               size_t num_pairs,
+                               const Visitor& visitor)
+    REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_) {
+  for (size_t i = 0; i < num_pairs; ++i) {
+    DexCachePair<T> source = pairs[i].load(std::memory_order_relaxed);
+    // NOTE: We need the "template" keyword here to avoid a compilation
+    // failure. GcRoot<T> is a template argument-dependent type and we need to
+    // tell the compiler to treat "Read" as a template rather than a field or
+    // function. Otherwise, on encountering the "<" token, the compiler would
+    // treat "Read" as a field.
+    T* const before = source.object.template Read<kReadBarrierOption>();
+    visitor.VisitRootIfNonNull(source.object.AddressWithoutBarrier());
+    if (source.object.template Read<kReadBarrierOption>() != before) {
+      pairs[i].store(source, std::memory_order_relaxed);
+    }
+  }
+}
+
 template <bool kVisitNativeRoots,
           VerifyObjectFlags kVerifyFlags,
           ReadBarrierOption kReadBarrierOption,
           typename Visitor>
-inline void DexCache::VisitReferences(mirror::Class* klass, const Visitor& visitor) {
+inline void DexCache::VisitReferences(ObjPtr<Class> klass, const Visitor& visitor) {
   // Visit instance fields first.
   VisitInstanceFieldsReferences<kVerifyFlags, kReadBarrierOption>(klass, visitor);
   // Visit arrays after.
   if (kVisitNativeRoots) {
-    mirror::StringDexCacheType* strings = GetStrings();
-    for (size_t i = 0, num_strings = NumStrings(); i != num_strings; ++i) {
-      StringDexCachePair source = strings[i].load(std::memory_order_relaxed);
-      mirror::String* before = source.object.Read<kReadBarrierOption>();
-      GcRoot<mirror::String> root(before);
-      visitor.VisitRootIfNonNull(root.AddressWithoutBarrier());
-      if (root.Read() != before) {
-        source.object = GcRoot<String>(root.Read());
-        strings[i].store(source, std::memory_order_relaxed);
-      }
-    }
+    VisitDexCachePairs<mirror::String, kReadBarrierOption, Visitor>(
+        GetStrings(), NumStrings(), visitor);
+
     GcRoot<mirror::Class>* resolved_types = GetResolvedTypes();
     for (size_t i = 0, num_types = NumResolvedTypes(); i != num_types; ++i) {
       visitor.VisitRootIfNonNull(resolved_types[i].AddressWithoutBarrier());
     }
+
+    VisitDexCachePairs<mirror::MethodType, kReadBarrierOption, Visitor>(
+        GetResolvedMethodTypes(), NumResolvedMethodTypes(), visitor);
   }
 }
 
@@ -207,6 +226,19 @@
   }
 }
 
+template <ReadBarrierOption kReadBarrierOption, typename Visitor>
+inline void DexCache::FixupResolvedMethodTypes(mirror::MethodTypeDexCacheType* dest,
+                                               const Visitor& visitor) {
+  mirror::MethodTypeDexCacheType* src = GetResolvedMethodTypes();
+  for (size_t i = 0, count = NumResolvedMethodTypes(); i < count; ++i) {
+    MethodTypeDexCachePair source = src[i].load(std::memory_order_relaxed);
+    mirror::MethodType* ptr = source.object.Read<kReadBarrierOption>();
+    mirror::MethodType* new_source = visitor(ptr);
+    source.object = GcRoot<MethodType>(new_source);
+    dest[i].store(source, std::memory_order_relaxed);
+  }
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/dex_cache.cc b/runtime/mirror/dex_cache.cc
index 66f858c..a32d51f 100644
--- a/runtime/mirror/dex_cache.cc
+++ b/runtime/mirror/dex_cache.cc
@@ -32,7 +32,7 @@
 namespace mirror {
 
 void DexCache::Init(const DexFile* dex_file,
-                    String* location,
+                    ObjPtr<String> location,
                     StringDexCacheType* strings,
                     uint32_t num_strings,
                     GcRoot<Class>* resolved_types,
@@ -84,7 +84,7 @@
   }
 }
 
-void DexCache::SetLocation(mirror::String* location) {
+void DexCache::SetLocation(ObjPtr<mirror::String> location) {
   SetFieldObject<false>(OFFSET_OF_OBJECT_MEMBER(DexCache, location_), location);
 }
 
diff --git a/runtime/mirror/dex_cache.h b/runtime/mirror/dex_cache.h
index 2fcabb5..1ae694d 100644
--- a/runtime/mirror/dex_cache.h
+++ b/runtime/mirror/dex_cache.h
@@ -74,6 +74,7 @@
   static GcRoot<T> Lookup(std::atomic<DexCachePair<T>>* dex_cache,
                           uint32_t idx,
                           uint32_t cache_size) {
+    DCHECK_NE(cache_size, 0u);
     DexCachePair<T> element = dex_cache[idx % cache_size].load(std::memory_order_relaxed);
     if (idx != element.index) {
       return GcRoot<T>(nullptr);
@@ -136,7 +137,7 @@
   }
 
   void Init(const DexFile* dex_file,
-            String* location,
+            ObjPtr<String> location,
             StringDexCacheType* strings,
             uint32_t num_strings,
             GcRoot<Class>* resolved_types,
@@ -160,6 +161,10 @@
   void FixupResolvedTypes(GcRoot<mirror::Class>* dest, const Visitor& visitor)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  template <ReadBarrierOption kReadBarrierOption = kWithReadBarrier, typename Visitor>
+  void FixupResolvedMethodTypes(MethodTypeDexCacheType* dest, const Visitor& visitor)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   String* GetLocation() REQUIRES_SHARED(Locks::mutator_lock_) {
     return GetFieldObject<String>(OFFSET_OF_OBJECT_MEMBER(DexCache, location_));
   }
@@ -211,7 +216,7 @@
   mirror::String* GetResolvedString(uint32_t string_idx) ALWAYS_INLINE
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void SetResolvedString(uint32_t string_idx, mirror::String* resolved) ALWAYS_INLINE
+  void SetResolvedString(uint32_t string_idx, ObjPtr<mirror::String> resolved) ALWAYS_INLINE
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Clear a string for a string_idx, used to undo string intern transactions to make sure
@@ -220,7 +225,8 @@
 
   Class* GetResolvedType(uint32_t type_idx) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void SetResolvedType(uint32_t type_idx, Class* resolved) REQUIRES_SHARED(Locks::mutator_lock_);
+  void SetResolvedType(uint32_t type_idx, ObjPtr<Class> resolved)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   ALWAYS_INLINE ArtMethod* GetResolvedMethod(uint32_t method_idx, PointerSize ptr_size)
       REQUIRES_SHARED(Locks::mutator_lock_);
@@ -282,7 +288,7 @@
 
   MethodTypeDexCacheType* GetResolvedMethodTypes()
       ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
-    return GetFieldPtr<MethodTypeDexCacheType*>(ResolvedMethodTypesOffset());
+    return GetFieldPtr64<MethodTypeDexCacheType*>(ResolvedMethodTypesOffset());
   }
 
   void SetResolvedMethodTypes(MethodTypeDexCacheType* resolved_method_types)
@@ -319,7 +325,7 @@
     SetFieldPtr<false>(OFFSET_OF_OBJECT_MEMBER(DexCache, dex_file_), dex_file);
   }
 
-  void SetLocation(mirror::String* location) REQUIRES_SHARED(Locks::mutator_lock_);
+  void SetLocation(ObjPtr<mirror::String> location) REQUIRES_SHARED(Locks::mutator_lock_);
 
   // NOTE: Get/SetElementPtrSize() are intended for working with ArtMethod** and ArtField**
   // provided by GetResolvedMethods/Fields() and ArtMethod::GetDexCacheResolvedMethods(),
@@ -337,7 +343,7 @@
             VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
             ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
             typename Visitor>
-  void VisitReferences(mirror::Class* klass, const Visitor& visitor)
+  void VisitReferences(ObjPtr<mirror::Class> klass, const Visitor& visitor)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(Locks::heap_bitmap_lock_);
 
   HeapReference<Object> dex_;
diff --git a/runtime/mirror/dex_cache_test.cc b/runtime/mirror/dex_cache_test.cc
index e95ca21..916f1cf 100644
--- a/runtime/mirror/dex_cache_test.cc
+++ b/runtime/mirror/dex_cache_test.cc
@@ -55,9 +55,8 @@
   EXPECT_EQ(java_lang_dex_file_->NumTypeIds(),   dex_cache->NumResolvedTypes());
   EXPECT_EQ(java_lang_dex_file_->NumMethodIds(), dex_cache->NumResolvedMethods());
   EXPECT_EQ(java_lang_dex_file_->NumFieldIds(),  dex_cache->NumResolvedFields());
-  // This should always be zero because the -Xexperimental:method-handles isn't
-  // set.
-  EXPECT_EQ(0u, dex_cache->NumResolvedMethodTypes());
+  EXPECT_TRUE(dex_cache->StaticMethodTypeSize() == dex_cache->NumResolvedMethodTypes()
+      || java_lang_dex_file_->NumProtoIds() == dex_cache->NumResolvedMethodTypes());
 }
 
 TEST_F(DexCacheMethodHandlesTest, Open) {
diff --git a/runtime/mirror/emulated_stack_frame.cc b/runtime/mirror/emulated_stack_frame.cc
new file mode 100644
index 0000000..d607040
--- /dev/null
+++ b/runtime/mirror/emulated_stack_frame.cc
@@ -0,0 +1,308 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "emulated_stack_frame.h"
+
+#include "class-inl.h"
+#include "gc_root-inl.h"
+#include "jvalue-inl.h"
+#include "method_handles.h"
+#include "method_handles-inl.h"
+#include "reflection-inl.h"
+
+namespace art {
+namespace mirror {
+
+GcRoot<mirror::Class> EmulatedStackFrame::static_class_;
+
+// Calculates the size of a stack frame based on the size of its argument
+// types and return types.
+static void CalculateFrameAndReferencesSize(ObjPtr<mirror::ObjectArray<mirror::Class>> p_types,
+                                            ObjPtr<mirror::Class> r_type,
+                                            size_t* frame_size_out,
+                                            size_t* references_size_out)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  const size_t length = p_types->GetLength();
+  size_t frame_size = 0;
+  size_t references_size = 0;
+  for (size_t i = 0; i < length; ++i) {
+    ObjPtr<mirror::Class> type = p_types->GetWithoutChecks(i);
+    const Primitive::Type primitive_type = type->GetPrimitiveType();
+    if (primitive_type == Primitive::kPrimNot) {
+      references_size++;
+    } else if (Primitive::Is64BitType(primitive_type)) {
+      frame_size += 8;
+    } else {
+      frame_size += 4;
+    }
+  }
+
+  const Primitive::Type return_type = r_type->GetPrimitiveType();
+  if (return_type == Primitive::kPrimNot) {
+    references_size++;
+  } else if (Primitive::Is64BitType(return_type)) {
+    frame_size += 8;
+  } else {
+    frame_size += 4;
+  }
+
+  (*frame_size_out) = frame_size;
+  (*references_size_out) = references_size;
+}
+
+// Allows for read or write access to an emulated stack frame. Each
+// accessor index has an associated index into the references / stack frame
+// arrays which is incremented on every read or write to the frame.
+//
+// This class is used in conjunction with PerformConversions, either as a setter
+// or as a getter.
+class EmulatedStackFrameAccessor {
+ public:
+  EmulatedStackFrameAccessor(Handle<mirror::ObjectArray<mirror::Object>> references,
+                             Handle<mirror::ByteArray> stack_frame,
+                             size_t stack_frame_size) :
+    references_(references),
+    stack_frame_(stack_frame),
+    stack_frame_size_(stack_frame_size),
+    reference_idx_(0u),
+    stack_frame_idx_(0u) {
+  }
+
+  ALWAYS_INLINE void SetReference(ObjPtr<mirror::Object> reference)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    references_->Set(reference_idx_++, reference);
+  }
+
+  ALWAYS_INLINE void Set(const uint32_t value) REQUIRES_SHARED(Locks::mutator_lock_) {
+    int8_t* array = stack_frame_->GetData();
+
+    CHECK_LE((stack_frame_idx_ + 4u), stack_frame_size_);
+    memcpy(array + stack_frame_idx_, &value, sizeof(uint32_t));
+    stack_frame_idx_ += 4u;
+  }
+
+  ALWAYS_INLINE void SetLong(const int64_t value) REQUIRES_SHARED(Locks::mutator_lock_) {
+    int8_t* array = stack_frame_->GetData();
+
+    CHECK_LE((stack_frame_idx_ + 8u), stack_frame_size_);
+    memcpy(array + stack_frame_idx_, &value, sizeof(int64_t));
+    stack_frame_idx_ += 8u;
+  }
+
+  ALWAYS_INLINE ObjPtr<mirror::Object> GetReference() REQUIRES_SHARED(Locks::mutator_lock_) {
+    return ObjPtr<mirror::Object>(references_->Get(reference_idx_++));
+  }
+
+  ALWAYS_INLINE uint32_t Get() REQUIRES_SHARED(Locks::mutator_lock_) {
+    const int8_t* array = stack_frame_->GetData();
+
+    CHECK_LE((stack_frame_idx_ + 4u), stack_frame_size_);
+    uint32_t val = 0;
+
+    memcpy(&val, array + stack_frame_idx_, sizeof(uint32_t));
+    stack_frame_idx_ += 4u;
+    return val;
+  }
+
+  ALWAYS_INLINE int64_t GetLong() REQUIRES_SHARED(Locks::mutator_lock_) {
+    const int8_t* array = stack_frame_->GetData();
+
+    CHECK_LE((stack_frame_idx_ + 8u), stack_frame_size_);
+    int64_t val = 0;
+
+    memcpy(&val, array + stack_frame_idx_, sizeof(int64_t));
+    stack_frame_idx_ += 8u;
+    return val;
+  }
+
+ private:
+  Handle<mirror::ObjectArray<mirror::Object>> references_;
+  Handle<mirror::ByteArray> stack_frame_;
+  const size_t stack_frame_size_;
+
+  size_t reference_idx_;
+  size_t stack_frame_idx_;
+
+  DISALLOW_COPY_AND_ASSIGN(EmulatedStackFrameAccessor);
+};
+
+template <bool is_range>
+mirror::EmulatedStackFrame* EmulatedStackFrame::CreateFromShadowFrameAndArgs(
+    Thread* self,
+    Handle<mirror::MethodType> caller_type,
+    Handle<mirror::MethodType> callee_type,
+    const ShadowFrame& caller_frame,
+    const uint32_t first_src_reg,
+    const uint32_t (&arg)[Instruction::kMaxVarArgRegs]) {
+  StackHandleScope<6> hs(self);
+
+  // Step 1: We must throw a WrongMethodTypeException if there's a mismatch in the
+  // number of arguments between the caller and the callsite.
+  Handle<mirror::ObjectArray<mirror::Class>> from_types(hs.NewHandle(caller_type->GetPTypes()));
+  Handle<mirror::ObjectArray<mirror::Class>> to_types(hs.NewHandle(callee_type->GetPTypes()));
+
+  const int32_t num_method_params = from_types->GetLength();
+  if (to_types->GetLength() != num_method_params) {
+    ThrowWrongMethodTypeException(callee_type.Get(), caller_type.Get());
+    return nullptr;
+  }
+
+  // Step 2: Calculate the size of the reference / byte arrays in the emulated
+  // stack frame.
+  size_t frame_size = 0;
+  size_t refs_size = 0;
+  Handle<mirror::Class> r_type(hs.NewHandle(callee_type->GetRType()));
+  CalculateFrameAndReferencesSize(to_types.Get(), r_type.Get(), &frame_size, &refs_size);
+
+  // Step 3 : Allocate the arrays.
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  ObjPtr<mirror::Class> array_class(class_linker->GetClassRoot(ClassLinker::kObjectArrayClass));
+
+  Handle<mirror::ObjectArray<mirror::Object>> references(hs.NewHandle(
+      mirror::ObjectArray<mirror::Object>::Alloc(self, array_class, refs_size)));
+  if (references.Get() == nullptr) {
+    DCHECK(self->IsExceptionPending());
+    return nullptr;
+  }
+
+  Handle<ByteArray> stack_frame(hs.NewHandle(ByteArray::Alloc(self, frame_size)));
+  if (stack_frame.Get() == nullptr) {
+    DCHECK(self->IsExceptionPending());
+    return nullptr;
+  }
+
+  // Step 4 : Perform argument conversions (if required).
+  ShadowFrameGetter<is_range> getter(first_src_reg, arg, caller_frame);
+  EmulatedStackFrameAccessor setter(references, stack_frame, stack_frame->GetLength());
+  if (!PerformConversions<ShadowFrameGetter<is_range>, EmulatedStackFrameAccessor>(
+          self, caller_type, callee_type, &getter, &setter, num_method_params)) {
+    return nullptr;
+  }
+
+  // Step 5: Construct the EmulatedStackFrame object.
+  Handle<EmulatedStackFrame> sf(hs.NewHandle(
+      ObjPtr<EmulatedStackFrame>::DownCast(StaticClass()->AllocObject(self))));
+  sf->SetFieldObject<false>(TypeOffset(), callee_type.Get());
+  sf->SetFieldObject<false>(ReferencesOffset(), references.Get());
+  sf->SetFieldObject<false>(StackFrameOffset(), stack_frame.Get());
+
+  return sf.Get();
+}
+
+bool EmulatedStackFrame::WriteToShadowFrame(Thread* self,
+                                            Handle<mirror::MethodType> callee_type,
+                                            const uint32_t first_dest_reg,
+                                            ShadowFrame* callee_frame) {
+  ObjPtr<mirror::ObjectArray<mirror::Class>> from_types(GetType()->GetPTypes());
+  ObjPtr<mirror::ObjectArray<mirror::Class>> to_types(callee_type->GetPTypes());
+
+  const int32_t num_method_params = from_types->GetLength();
+  if (to_types->GetLength() != num_method_params) {
+    ThrowWrongMethodTypeException(callee_type.Get(), GetType());
+    return false;
+  }
+
+  StackHandleScope<3> hs(self);
+  Handle<mirror::MethodType> frame_callsite_type(hs.NewHandle(GetType()));
+  Handle<mirror::ObjectArray<mirror::Object>> references(hs.NewHandle(GetReferences()));
+  Handle<ByteArray> stack_frame(hs.NewHandle(GetStackFrame()));
+
+  EmulatedStackFrameAccessor getter(references, stack_frame, stack_frame->GetLength());
+  ShadowFrameSetter setter(callee_frame, first_dest_reg);
+
+  return PerformConversions<EmulatedStackFrameAccessor, ShadowFrameSetter>(
+      self, frame_callsite_type, callee_type, &getter, &setter, num_method_params);
+}
+
+void EmulatedStackFrame::GetReturnValue(Thread* self, JValue* value) {
+  StackHandleScope<2> hs(self);
+  Handle<mirror::Class> r_type(hs.NewHandle(GetType()->GetRType()));
+
+  const Primitive::Type type = r_type->GetPrimitiveType();
+  if (type == Primitive::kPrimNot) {
+    Handle<mirror::ObjectArray<mirror::Object>> references(hs.NewHandle(GetReferences()));
+    value->SetL(references->GetWithoutChecks(references->GetLength() - 1));
+  } else {
+    Handle<ByteArray> stack_frame(hs.NewHandle(GetStackFrame()));
+    const int8_t* array = stack_frame->GetData();
+    const size_t length = stack_frame->GetLength();
+    if (Primitive::Is64BitType(type)) {
+      int64_t primitive = 0;
+      memcpy(&primitive, array + length - sizeof(int64_t), sizeof(int64_t));
+      value->SetJ(primitive);
+    } else {
+      uint32_t primitive = 0;
+      memcpy(&primitive, array + length - sizeof(uint32_t), sizeof(uint32_t));
+      value->SetI(primitive);
+    }
+  }
+}
+
+void EmulatedStackFrame::SetReturnValue(Thread* self, const JValue& value) {
+  StackHandleScope<2> hs(self);
+  Handle<mirror::Class> r_type(hs.NewHandle(GetType()->GetRType()));
+
+  const Primitive::Type type = r_type->GetPrimitiveType();
+  if (type == Primitive::kPrimNot) {
+    Handle<mirror::ObjectArray<mirror::Object>> references(hs.NewHandle(GetReferences()));
+    references->SetWithoutChecks<false>(references->GetLength() - 1, value.GetL());
+  } else {
+    Handle<ByteArray> stack_frame(hs.NewHandle(GetStackFrame()));
+    int8_t* array = stack_frame->GetData();
+    const size_t length = stack_frame->GetLength();
+    if (Primitive::Is64BitType(type)) {
+      const int64_t primitive = value.GetJ();
+      memcpy(array + length - sizeof(int64_t), &primitive, sizeof(int64_t));
+    } else {
+      const uint32_t primitive = value.GetI();
+      memcpy(array + length - sizeof(uint32_t), &primitive, sizeof(uint32_t));
+    }
+  }
+}
+
+void EmulatedStackFrame::SetClass(Class* klass) {
+  CHECK(static_class_.IsNull()) << static_class_.Read() << " " << klass;
+  CHECK(klass != nullptr);
+  static_class_ = GcRoot<Class>(klass);
+}
+
+void EmulatedStackFrame::ResetClass() {
+  CHECK(!static_class_.IsNull());
+  static_class_ = GcRoot<Class>(nullptr);
+}
+
+void EmulatedStackFrame::VisitRoots(RootVisitor* visitor) {
+  static_class_.VisitRootIfNonNull(visitor, RootInfo(kRootStickyClass));
+}
+
+// Explicit DoInvokePolymorphic template function declarations.
+#define EXPLICIT_CREATE_FROM_SHADOW_FRAME_AND_ARGS_DECL(_is_range)                         \
+  template REQUIRES_SHARED(Locks::mutator_lock_)                                           \
+  mirror::EmulatedStackFrame* EmulatedStackFrame::CreateFromShadowFrameAndArgs<_is_range>( \
+    Thread* self,                                                                          \
+    Handle<mirror::MethodType> caller_type,                                                \
+    Handle<mirror::MethodType> callee_type,                                                \
+    const ShadowFrame& caller_frame,                                                       \
+    const uint32_t first_src_reg,                                                          \
+    const uint32_t (&arg)[Instruction::kMaxVarArgRegs])                                    \
+
+EXPLICIT_CREATE_FROM_SHADOW_FRAME_AND_ARGS_DECL(true);
+EXPLICIT_CREATE_FROM_SHADOW_FRAME_AND_ARGS_DECL(false);
+#undef EXPLICIT_CREATE_FROM_SHADOW_FRAME_AND_ARGS_DECL
+
+
+}  // namespace mirror
+}  // namespace art
diff --git a/runtime/mirror/emulated_stack_frame.h b/runtime/mirror/emulated_stack_frame.h
new file mode 100644
index 0000000..9fa06b7
--- /dev/null
+++ b/runtime/mirror/emulated_stack_frame.h
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_MIRROR_EMULATED_STACK_FRAME_H_
+#define ART_RUNTIME_MIRROR_EMULATED_STACK_FRAME_H_
+
+#include "dex_instruction.h"
+#include "method_type.h"
+#include "object.h"
+#include "stack.h"
+#include "string.h"
+#include "utils.h"
+
+namespace art {
+
+struct EmulatedStackFrameOffsets;
+
+namespace mirror {
+
+// C++ mirror of dalvik.system.EmulatedStackFrame
+class MANAGED EmulatedStackFrame : public Object {
+ public:
+  // Creates an emulated stack frame whose type is |frame_type| from
+  // a shadow frame.
+  template <bool is_range>
+  static mirror::EmulatedStackFrame* CreateFromShadowFrameAndArgs(
+      Thread* self,
+      Handle<mirror::MethodType> args_type,
+      Handle<mirror::MethodType> frame_type,
+      const ShadowFrame& caller_frame,
+      const uint32_t first_src_reg,
+      const uint32_t (&arg)[Instruction::kMaxVarArgRegs]) REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Writes the contents of this emulated stack frame to the |callee_frame|
+  // whose type is |callee_type|, starting at |first_dest_reg|.
+  bool WriteToShadowFrame(
+      Thread* self,
+      Handle<mirror::MethodType> callee_type,
+      const uint32_t first_dest_reg,
+      ShadowFrame* callee_frame) REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Sets |value| to the return value written to this emulated stack frame (if any).
+  void GetReturnValue(Thread* self, JValue* value) REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Sets the return value slot of this emulated stack frame to |value|.
+  void SetReturnValue(Thread* self, const JValue& value) REQUIRES_SHARED(Locks::mutator_lock_);
+
+  static void SetClass(Class* klass) REQUIRES_SHARED(Locks::mutator_lock_);
+  static void ResetClass() REQUIRES_SHARED(Locks::mutator_lock_);
+  static void VisitRoots(RootVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_);
+
+ private:
+  static mirror::Class* StaticClass() REQUIRES_SHARED(Locks::mutator_lock_) {
+    return static_class_.Read();
+  }
+
+  mirror::MethodType* GetType() REQUIRES_SHARED(Locks::mutator_lock_) {
+    return GetFieldObject<MethodType>(OFFSET_OF_OBJECT_MEMBER(EmulatedStackFrame, type_));
+  }
+
+  mirror::ObjectArray<mirror::Object>* GetReferences() REQUIRES_SHARED(Locks::mutator_lock_) {
+    return GetFieldObject<mirror::ObjectArray<mirror::Object>>(
+        OFFSET_OF_OBJECT_MEMBER(EmulatedStackFrame, references_));
+  }
+
+  mirror::ByteArray* GetStackFrame() REQUIRES_SHARED(Locks::mutator_lock_) {
+    return GetFieldObject<mirror::ByteArray>(
+        OFFSET_OF_OBJECT_MEMBER(EmulatedStackFrame, stack_frame_));
+  }
+
+  static MemberOffset TypeOffset() {
+    return MemberOffset(OFFSETOF_MEMBER(EmulatedStackFrame, type_));
+  }
+
+  static MemberOffset ReferencesOffset() {
+    return MemberOffset(OFFSETOF_MEMBER(EmulatedStackFrame, references_));
+  }
+
+  static MemberOffset StackFrameOffset() {
+    return MemberOffset(OFFSETOF_MEMBER(EmulatedStackFrame, stack_frame_));
+  }
+
+  HeapReference<mirror::ObjectArray<mirror::Object>> references_;
+  HeapReference<mirror::ByteArray> stack_frame_;
+  HeapReference<mirror::MethodType> type_;
+
+  static GcRoot<mirror::Class> static_class_;  // dalvik.system.EmulatedStackFrame.class
+
+  friend struct art::EmulatedStackFrameOffsets;  // for verifying offset information
+  DISALLOW_IMPLICIT_CONSTRUCTORS(EmulatedStackFrame);
+};
+
+}  // namespace mirror
+}  // namespace art
+
+#endif  // ART_RUNTIME_MIRROR_EMULATED_STACK_FRAME_H_
diff --git a/runtime/mirror/executable.cc b/runtime/mirror/executable.cc
index 33ebd81..17c16a2 100644
--- a/runtime/mirror/executable.cc
+++ b/runtime/mirror/executable.cc
@@ -32,14 +32,10 @@
   return true;
 }
 
-template bool Executable::CreateFromArtMethod<PointerSize::k32, false>(
-    ArtMethod* method);
-template bool Executable::CreateFromArtMethod<PointerSize::k32, true>(
-    ArtMethod* method);
-template bool Executable::CreateFromArtMethod<PointerSize::k64, false>(
-    ArtMethod* method);
-template bool Executable::CreateFromArtMethod<PointerSize::k64, true>(
-    ArtMethod* method);
+template bool Executable::CreateFromArtMethod<PointerSize::k32, false>(ArtMethod* method);
+template bool Executable::CreateFromArtMethod<PointerSize::k32, true>(ArtMethod* method);
+template bool Executable::CreateFromArtMethod<PointerSize::k64, false>(ArtMethod* method);
+template bool Executable::CreateFromArtMethod<PointerSize::k64, true>(ArtMethod* method);
 
 ArtMethod* Executable::GetArtMethod() {
   return reinterpret_cast<ArtMethod*>(GetField64(ArtMethodOffset()));
diff --git a/runtime/mirror/field-inl.h b/runtime/mirror/field-inl.h
index 445f23f..c03f20a 100644
--- a/runtime/mirror/field-inl.h
+++ b/runtime/mirror/field-inl.h
@@ -48,7 +48,7 @@
       self->ClearException();
     }
   }
-  auto ret = hs.NewHandle(static_cast<Field*>(StaticClass()->AllocObject(self)));
+  auto ret = hs.NewHandle(ObjPtr<Field>::DownCast(StaticClass()->AllocObject(self)));
   if (UNLIKELY(ret.Get() == nullptr)) {
     self->AssertPendingOOMException();
     return nullptr;
@@ -79,8 +79,13 @@
 }
 
 template<bool kTransactionActive>
-void Field::SetDeclaringClass(ObjPtr<mirror::Class> c) {
-  SetFieldObject<kTransactionActive>(OFFSET_OF_OBJECT_MEMBER(Field, declaring_class_), c.Ptr());
+inline void Field::SetDeclaringClass(ObjPtr<mirror::Class> c) {
+  SetFieldObject<kTransactionActive>(OFFSET_OF_OBJECT_MEMBER(Field, declaring_class_), c);
+}
+
+template<bool kTransactionActive>
+inline void Field::SetType(ObjPtr<mirror::Class> type) {
+  SetFieldObject<kTransactionActive>(OFFSET_OF_OBJECT_MEMBER(Field, type_), type);
 }
 
 }  // namespace mirror
diff --git a/runtime/mirror/field.cc b/runtime/mirror/field.cc
index 65f6b16..f6b6489 100644
--- a/runtime/mirror/field.cc
+++ b/runtime/mirror/field.cc
@@ -27,7 +27,7 @@
 GcRoot<Class> Field::static_class_;
 GcRoot<Class> Field::array_class_;
 
-void Field::SetClass(Class* klass) {
+void Field::SetClass(ObjPtr<Class> klass) {
   CHECK(static_class_.IsNull()) << static_class_.Read() << " " << klass;
   CHECK(klass != nullptr);
   static_class_ = GcRoot<Class>(klass);
@@ -38,7 +38,7 @@
   static_class_ = GcRoot<Class>(nullptr);
 }
 
-void Field::SetArrayClass(Class* klass) {
+void Field::SetArrayClass(ObjPtr<Class> klass) {
   CHECK(array_class_.IsNull()) << array_class_.Read() << " " << klass;
   CHECK(klass != nullptr);
   array_class_ = GcRoot<Class>(klass);
diff --git a/runtime/mirror/field.h b/runtime/mirror/field.h
index c5357c9..222d709 100644
--- a/runtime/mirror/field.h
+++ b/runtime/mirror/field.h
@@ -83,10 +83,10 @@
     return GetField32(OFFSET_OF_OBJECT_MEMBER(Field, offset_));
   }
 
-  static void SetClass(Class* klass) REQUIRES_SHARED(Locks::mutator_lock_);
+  static void SetClass(ObjPtr<Class> klass) REQUIRES_SHARED(Locks::mutator_lock_);
   static void ResetClass() REQUIRES_SHARED(Locks::mutator_lock_);
 
-  static void SetArrayClass(Class* klass) REQUIRES_SHARED(Locks::mutator_lock_);
+  static void SetArrayClass(ObjPtr<Class> klass) REQUIRES_SHARED(Locks::mutator_lock_);
   static void ResetArrayClass() REQUIRES_SHARED(Locks::mutator_lock_);
 
   static void VisitRoots(RootVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_);
@@ -113,9 +113,7 @@
   void SetDeclaringClass(ObjPtr<mirror::Class> c) REQUIRES_SHARED(Locks::mutator_lock_);
 
   template<bool kTransactionActive>
-  void SetType(mirror::Class* type) REQUIRES_SHARED(Locks::mutator_lock_) {
-    SetFieldObject<kTransactionActive>(OFFSET_OF_OBJECT_MEMBER(Field, type_), type);
-  }
+  void SetType(ObjPtr<mirror::Class> type) REQUIRES_SHARED(Locks::mutator_lock_);
 
   template<bool kTransactionActive>
   void SetAccessFlags(uint32_t flags) REQUIRES_SHARED(Locks::mutator_lock_) {
diff --git a/runtime/mirror/iftable-inl.h b/runtime/mirror/iftable-inl.h
index b465d07..d6191c2 100644
--- a/runtime/mirror/iftable-inl.h
+++ b/runtime/mirror/iftable-inl.h
@@ -18,11 +18,12 @@
 #define ART_RUNTIME_MIRROR_IFTABLE_INL_H_
 
 #include "iftable.h"
+#include "obj_ptr-inl.h"
 
 namespace art {
 namespace mirror {
 
-inline void IfTable::SetInterface(int32_t i, Class* interface) {
+inline void IfTable::SetInterface(int32_t i, ObjPtr<Class> interface) {
   DCHECK(interface != nullptr);
   DCHECK(interface->IsInterface());
   const size_t idx = i * kMax + kInterface;
@@ -30,6 +31,13 @@
   SetWithoutChecks<false>(idx, interface);
 }
 
+inline void IfTable::SetMethodArray(int32_t i, ObjPtr<PointerArray> arr) {
+  DCHECK(arr != nullptr);
+  auto idx = i * kMax + kMethodArray;
+  DCHECK(Get(idx) == nullptr);
+  Set<false>(idx, arr);
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/iftable.h b/runtime/mirror/iftable.h
index a1a2f98..296c163 100644
--- a/runtime/mirror/iftable.h
+++ b/runtime/mirror/iftable.h
@@ -31,7 +31,7 @@
     return interface;
   }
 
-  ALWAYS_INLINE void SetInterface(int32_t i, Class* interface)
+  ALWAYS_INLINE void SetInterface(int32_t i, ObjPtr<Class> interface)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
@@ -51,12 +51,7 @@
     return method_array == nullptr ? 0u : method_array->GetLength();
   }
 
-  void SetMethodArray(int32_t i, PointerArray* arr) REQUIRES_SHARED(Locks::mutator_lock_) {
-    DCHECK(arr != nullptr);
-    auto idx = i * kMax + kMethodArray;
-    DCHECK(Get(idx) == nullptr);
-    Set<false>(idx, arr);
-  }
+  void SetMethodArray(int32_t i, ObjPtr<PointerArray> arr) REQUIRES_SHARED(Locks::mutator_lock_);
 
   size_t Count() REQUIRES_SHARED(Locks::mutator_lock_) {
     return GetLength() / kMax;
diff --git a/runtime/mirror/method.cc b/runtime/mirror/method.cc
index 71bac7e..25cbdc1 100644
--- a/runtime/mirror/method.cc
+++ b/runtime/mirror/method.cc
@@ -53,13 +53,13 @@
 
 template <PointerSize kPointerSize, bool kTransactionActive>
 Method* Method::CreateFromArtMethod(Thread* self, ArtMethod* method) {
-  DCHECK(!method->IsConstructor()) << PrettyMethod(method);
-  auto* ret = down_cast<Method*>(StaticClass()->AllocObject(self));
+  DCHECK(!method->IsConstructor()) << method->PrettyMethod();
+  ObjPtr<Method> ret = ObjPtr<Method>::DownCast(StaticClass()->AllocObject(self));
   if (LIKELY(ret != nullptr)) {
-    static_cast<Executable*>(ret)->
+    ObjPtr<Executable>(ret)->
         CreateFromArtMethod<kPointerSize, kTransactionActive>(method);
   }
-  return ret;
+  return ret.Ptr();
 }
 
 template Method* Method::CreateFromArtMethod<PointerSize::k32, false>(Thread* self,
@@ -105,13 +105,13 @@
 
 template <PointerSize kPointerSize, bool kTransactionActive>
 Constructor* Constructor::CreateFromArtMethod(Thread* self, ArtMethod* method) {
-  DCHECK(method->IsConstructor()) << PrettyMethod(method);
-  auto* ret = down_cast<Constructor*>(StaticClass()->AllocObject(self));
+  DCHECK(method->IsConstructor()) << method->PrettyMethod();
+  ObjPtr<Constructor> ret = ObjPtr<Constructor>::DownCast(StaticClass()->AllocObject(self));
   if (LIKELY(ret != nullptr)) {
-    static_cast<Executable*>(ret)->
+    ObjPtr<Executable>(ret)->
         CreateFromArtMethod<kPointerSize, kTransactionActive>(method);
   }
-  return ret;
+  return ret.Ptr();
 }
 
 template Constructor* Constructor::CreateFromArtMethod<PointerSize::k32, false>(
diff --git a/runtime/mirror/method_handle_impl.h b/runtime/mirror/method_handle_impl.h
index 40716ad..9054216 100644
--- a/runtime/mirror/method_handle_impl.h
+++ b/runtime/mirror/method_handle_impl.h
@@ -36,6 +36,15 @@
     return GetFieldObject<mirror::MethodType>(OFFSET_OF_OBJECT_MEMBER(MethodHandle, method_type_));
   }
 
+  mirror::MethodType* GetNominalType() REQUIRES_SHARED(Locks::mutator_lock_) {
+    return GetFieldObject<mirror::MethodType>(OFFSET_OF_OBJECT_MEMBER(MethodHandle, nominal_type_));
+  }
+
+  ArtField* GetTargetField() REQUIRES_SHARED(Locks::mutator_lock_) {
+    return reinterpret_cast<ArtField*>(
+        GetField64(OFFSET_OF_OBJECT_MEMBER(MethodHandle, art_field_or_method_)));
+  }
+
   ArtMethod* GetTargetMethod() REQUIRES_SHARED(Locks::mutator_lock_) {
     return reinterpret_cast<ArtMethod*>(
         GetField64(OFFSET_OF_OBJECT_MEMBER(MethodHandle, art_field_or_method_)));
@@ -49,14 +58,14 @@
   }
 
  private:
-  HeapReference<mirror::Object> as_type_cache_;
+  HeapReference<mirror::MethodType> nominal_type_;
   HeapReference<mirror::MethodType> method_type_;
   uint64_t art_field_or_method_;
   uint32_t handle_kind_;
 
  private:
-  static MemberOffset AsTypeCacheOffset() {
-    return MemberOffset(OFFSETOF_MEMBER(MethodHandle, as_type_cache_));
+  static MemberOffset NominalTypeOffset() {
+    return MemberOffset(OFFSETOF_MEMBER(MethodHandle, nominal_type_));
   }
   static MemberOffset MethodTypeOffset() {
     return MemberOffset(OFFSETOF_MEMBER(MethodHandle, method_type_));
diff --git a/runtime/mirror/method_type.cc b/runtime/mirror/method_type.cc
index ba6ea5e..5d77a16 100644
--- a/runtime/mirror/method_type.cc
+++ b/runtime/mirror/method_type.cc
@@ -18,6 +18,7 @@
 
 #include "class-inl.h"
 #include "gc_root-inl.h"
+#include "method_handles.h"
 
 namespace art {
 namespace mirror {
@@ -29,7 +30,7 @@
                                        Handle<ObjectArray<Class>> param_types) {
   StackHandleScope<1> hs(self);
   Handle<mirror::MethodType> mt(
-      hs.NewHandle(static_cast<MethodType*>(StaticClass()->AllocObject(self))));
+      hs.NewHandle(ObjPtr<MethodType>::DownCast(StaticClass()->AllocObject(self))));
 
   // TODO: Do we ever create a MethodType during a transaction ? There doesn't
   // seem like a good reason to do a polymorphic invoke that results in the
@@ -43,28 +44,66 @@
   return mt.Get();
 }
 
-bool MethodType::IsExactMatch(mirror::MethodType* other) REQUIRES_SHARED(Locks::mutator_lock_) {
-  if (GetRType() != other->GetRType()) {
-    return false;
-  }
-
+bool MethodType::IsExactMatch(mirror::MethodType* target) REQUIRES_SHARED(Locks::mutator_lock_) {
   mirror::ObjectArray<Class>* const p_types = GetPTypes();
   const int32_t params_length = p_types->GetLength();
 
-  mirror::ObjectArray<Class>* const other_p_types = other->GetPTypes();
-  if (params_length != other_p_types->GetLength()) {
+  mirror::ObjectArray<Class>* const target_p_types = target->GetPTypes();
+  if (params_length != target_p_types->GetLength()) {
+    return false;
+  }
+  for (int32_t i = 0; i < params_length; ++i) {
+    if (p_types->GetWithoutChecks(i) != target_p_types->GetWithoutChecks(i)) {
+      return false;
+    }
+  }
+  return GetRType() == target->GetRType();
+}
+
+bool MethodType::IsConvertible(mirror::MethodType* target) REQUIRES_SHARED(Locks::mutator_lock_) {
+  mirror::ObjectArray<Class>* const p_types = GetPTypes();
+  const int32_t params_length = p_types->GetLength();
+
+  mirror::ObjectArray<Class>* const target_p_types = target->GetPTypes();
+  if (params_length != target_p_types->GetLength()) {
+    return false;
+  }
+
+  // Perform return check before invoking method handle otherwise side
+  // effects from the invocation may be observable before
+  // WrongMethodTypeException is raised.
+  if (!IsReturnTypeConvertible(target->GetRType(), GetRType())) {
     return false;
   }
 
   for (int32_t i = 0; i < params_length; ++i) {
-    if (p_types->GetWithoutChecks(i) != other_p_types->GetWithoutChecks(i)) {
+    if (!IsParameterTypeConvertible(p_types->GetWithoutChecks(i),
+                                    target_p_types->GetWithoutChecks(i))) {
       return false;
     }
   }
-
   return true;
 }
 
+std::string MethodType::PrettyDescriptor() REQUIRES_SHARED(Locks::mutator_lock_) {
+  std::ostringstream ss;
+  ss << "(";
+
+  mirror::ObjectArray<Class>* const p_types = GetPTypes();
+  const int32_t params_length = p_types->GetLength();
+  for (int32_t i = 0; i < params_length; ++i) {
+    ss << p_types->GetWithoutChecks(i)->PrettyDescriptor();
+    if (i != (params_length - 1)) {
+      ss << ", ";
+    }
+  }
+
+  ss << ")";
+  ss << GetRType()->PrettyDescriptor();
+
+  return ss.str();
+}
+
 void MethodType::SetClass(Class* klass) {
   CHECK(static_class_.IsNull()) << static_class_.Read() << " " << klass;
   CHECK(klass != nullptr);
diff --git a/runtime/mirror/method_type.h b/runtime/mirror/method_type.h
index 5b50409..9a98143 100644
--- a/runtime/mirror/method_type.h
+++ b/runtime/mirror/method_type.h
@@ -52,9 +52,17 @@
   static void ResetClass() REQUIRES_SHARED(Locks::mutator_lock_);
   static void VisitRoots(RootVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Returns true iff. |other| is an exact match for this method type, i.e
+  // Returns true iff. |this| is an exact match for method type |target|, i.e
   // iff. they have the same return types and parameter types.
-  bool IsExactMatch(mirror::MethodType* other) REQUIRES_SHARED(Locks::mutator_lock_);
+  bool IsExactMatch(mirror::MethodType* target) REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Returns true iff. |this| can be converted to match |target| method type, i.e
+  // iff. they have convertible return types and parameter types.
+  bool IsConvertible(mirror::MethodType* target) REQUIRES_SHARED(Locks::mutator_lock_);
+
+  // Returns the pretty descriptor for this method type, suitable for display in
+  // exception messages and the like.
+  std::string PrettyDescriptor() REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
   static MemberOffset FormOffset() {
diff --git a/runtime/mirror/method_type_test.cc b/runtime/mirror/method_type_test.cc
index a968bff..03ab930 100644
--- a/runtime/mirror/method_type_test.cc
+++ b/runtime/mirror/method_type_test.cc
@@ -52,7 +52,7 @@
           soa.Self(), FullyQualifiedType(return_type).c_str(), boot_class_loader));
   CHECK(return_clazz.Get() != nullptr);
 
-  mirror::Class* class_type = mirror::Class::GetJavaLangClass();
+  ObjPtr<mirror::Class> class_type = mirror::Class::GetJavaLangClass();
   mirror::Class* class_array_type = class_linker->FindArrayClass(self, &class_type);
   Handle<mirror::ObjectArray<mirror::Class>> param_classes = hs.NewHandle(
       mirror::ObjectArray<mirror::Class>::Alloc(self, class_array_type, param_types.size()));
diff --git a/runtime/mirror/object-inl.h b/runtime/mirror/object-inl.h
index ad7558c..6d29ed3 100644
--- a/runtime/mirror/object-inl.h
+++ b/runtime/mirror/object-inl.h
@@ -31,6 +31,7 @@
 #include "lock_word-inl.h"
 #include "monitor.h"
 #include "object_array-inl.h"
+#include "object_reference-inl.h"
 #include "obj_ptr-inl.h"
 #include "read_barrier-inl.h"
 #include "reference.h"
@@ -53,7 +54,7 @@
 }
 
 template<VerifyObjectFlags kVerifyFlags>
-inline void Object::SetClass(Class* new_klass) {
+inline void Object::SetClass(ObjPtr<Class> new_klass) {
   // new_klass may be null prior to class linker initialization.
   // We don't mark the card as this occurs as part of object allocation. Not all objects have
   // backing cards, such as large objects.
@@ -134,32 +135,88 @@
   Monitor::Wait(self, this, ms, ns, true, kTimedWaiting);
 }
 
-inline Object* Object::GetReadBarrierPointer() {
+inline uint32_t Object::GetReadBarrierState(uintptr_t* fake_address_dependency) {
+#ifdef USE_BAKER_READ_BARRIER
+  CHECK(kUseBakerReadBarrier);
+#if defined(__arm__)
+  uintptr_t obj = reinterpret_cast<uintptr_t>(this);
+  uintptr_t result;
+  DCHECK_EQ(OFFSETOF_MEMBER(Object, monitor_), 4U);
+  // Use inline assembly to prevent the compiler from optimizing away the false dependency.
+  __asm__ __volatile__(
+      "ldr %[result], [%[obj], #4]\n\t"
+      // This instruction is enough to "fool the compiler and the CPU" by having `fad` always be
+      // null, without them being able to assume that fact.
+      "eor %[fad], %[result], %[result]\n\t"
+      : [result] "+r" (result), [fad] "=r" (*fake_address_dependency)
+      : [obj] "r" (obj));
+  DCHECK_EQ(*fake_address_dependency, 0U);
+  LockWord lw(static_cast<uint32_t>(result));
+  uint32_t rb_state = lw.ReadBarrierState();
+  return rb_state;
+#elif defined(__aarch64__)
+  uintptr_t obj = reinterpret_cast<uintptr_t>(this);
+  uintptr_t result;
+  DCHECK_EQ(OFFSETOF_MEMBER(Object, monitor_), 4U);
+  // Use inline assembly to prevent the compiler from optimizing away the false dependency.
+  __asm__ __volatile__(
+      "ldr %w[result], [%[obj], #4]\n\t"
+      // This instruction is enough to "fool the compiler and the CPU" by having `fad` always be
+      // null, without them being able to assume that fact.
+      "eor %[fad], %[result], %[result]\n\t"
+      : [result] "+r" (result), [fad] "=r" (*fake_address_dependency)
+      : [obj] "r" (obj));
+  DCHECK_EQ(*fake_address_dependency, 0U);
+  LockWord lw(static_cast<uint32_t>(result));
+  uint32_t rb_state = lw.ReadBarrierState();
+  return rb_state;
+#elif defined(__i386__) || defined(__x86_64__)
+  LockWord lw = GetLockWord(false);
+  // i386/x86_64 don't need fake address dependency. Use a compiler fence to avoid compiler
+  // reordering.
+  *fake_address_dependency = 0;
+  std::atomic_signal_fence(std::memory_order_acquire);
+  uint32_t rb_state = lw.ReadBarrierState();
+  return rb_state;
+#else
+  // mips/mips64
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+  UNUSED(fake_address_dependency);
+#endif
+#else  // !USE_BAKER_READ_BARRIER
+  LOG(FATAL) << "Unreachable";
+  UNREACHABLE();
+  UNUSED(fake_address_dependency);
+#endif
+}
+
+inline uint32_t Object::GetReadBarrierState() {
 #ifdef USE_BAKER_READ_BARRIER
   DCHECK(kUseBakerReadBarrier);
-  return reinterpret_cast<Object*>(GetLockWord(false).ReadBarrierState());
-#elif USE_BROOKS_READ_BARRIER
-  DCHECK(kUseBrooksReadBarrier);
-  return GetFieldObject<Object, kVerifyNone, kWithoutReadBarrier>(
-      OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_));
+  LockWord lw(GetField<uint32_t, /*kIsVolatile*/false>(OFFSET_OF_OBJECT_MEMBER(Object, monitor_)));
+  uint32_t rb_state = lw.ReadBarrierState();
+  DCHECK(ReadBarrier::IsValidReadBarrierState(rb_state)) << rb_state;
+  return rb_state;
 #else
   LOG(FATAL) << "Unreachable";
   UNREACHABLE();
 #endif
 }
 
-inline Object* Object::GetReadBarrierPointerAcquire() {
+inline uint32_t Object::GetReadBarrierStateAcquire() {
 #ifdef USE_BAKER_READ_BARRIER
   DCHECK(kUseBakerReadBarrier);
   LockWord lw(GetFieldAcquire<uint32_t>(OFFSET_OF_OBJECT_MEMBER(Object, monitor_)));
-  return reinterpret_cast<Object*>(lw.ReadBarrierState());
+  uint32_t rb_state = lw.ReadBarrierState();
+  DCHECK(ReadBarrier::IsValidReadBarrierState(rb_state)) << rb_state;
+  return rb_state;
 #else
   LOG(FATAL) << "Unreachable";
   UNREACHABLE();
 #endif
 }
 
-
 inline uint32_t Object::GetMarkBit() {
 #ifdef USE_READ_BARRIER
   return GetLockWord(false).MarkBitState();
@@ -169,48 +226,38 @@
 #endif
 }
 
-inline void Object::SetReadBarrierPointer(Object* rb_ptr) {
+inline void Object::SetReadBarrierState(uint32_t rb_state) {
 #ifdef USE_BAKER_READ_BARRIER
   DCHECK(kUseBakerReadBarrier);
-  DCHECK_EQ(reinterpret_cast<uint64_t>(rb_ptr) >> 32, 0U);
-  DCHECK_NE(rb_ptr, ReadBarrier::BlackPtr()) << "Setting to black is not supported";
+  DCHECK(ReadBarrier::IsValidReadBarrierState(rb_state)) << rb_state;
   LockWord lw = GetLockWord(false);
-  lw.SetReadBarrierState(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(rb_ptr)));
+  lw.SetReadBarrierState(rb_state);
   SetLockWord(lw, false);
-#elif USE_BROOKS_READ_BARRIER
-  DCHECK(kUseBrooksReadBarrier);
-  // We don't mark the card as this occurs as part of object allocation. Not all objects have
-  // backing cards, such as large objects.
-  SetFieldObjectWithoutWriteBarrier<false, false, kVerifyNone>(
-      OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_), rb_ptr);
 #else
   LOG(FATAL) << "Unreachable";
   UNREACHABLE();
-  UNUSED(rb_ptr);
+  UNUSED(rb_state);
 #endif
 }
 
 template<bool kCasRelease>
-inline bool Object::AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr) {
+inline bool Object::AtomicSetReadBarrierState(uint32_t expected_rb_state, uint32_t rb_state) {
 #ifdef USE_BAKER_READ_BARRIER
   DCHECK(kUseBakerReadBarrier);
-  DCHECK_EQ(reinterpret_cast<uint64_t>(expected_rb_ptr) >> 32, 0U);
-  DCHECK_EQ(reinterpret_cast<uint64_t>(rb_ptr) >> 32, 0U);
-  DCHECK_NE(expected_rb_ptr, ReadBarrier::BlackPtr()) << "Setting to black is not supported";
-  DCHECK_NE(rb_ptr, ReadBarrier::BlackPtr()) << "Setting to black is not supported";
+  DCHECK(ReadBarrier::IsValidReadBarrierState(expected_rb_state)) << expected_rb_state;
+  DCHECK(ReadBarrier::IsValidReadBarrierState(rb_state)) << rb_state;
   LockWord expected_lw;
   LockWord new_lw;
   do {
     LockWord lw = GetLockWord(false);
-    if (UNLIKELY(reinterpret_cast<Object*>(lw.ReadBarrierState()) != expected_rb_ptr)) {
+    if (UNLIKELY(lw.ReadBarrierState() != expected_rb_state)) {
       // Lost the race.
       return false;
     }
     expected_lw = lw;
-    expected_lw.SetReadBarrierState(
-        static_cast<uint32_t>(reinterpret_cast<uintptr_t>(expected_rb_ptr)));
+    expected_lw.SetReadBarrierState(expected_rb_state);
     new_lw = lw;
-    new_lw.SetReadBarrierState(static_cast<uint32_t>(reinterpret_cast<uintptr_t>(rb_ptr)));
+    new_lw.SetReadBarrierState(rb_state);
     // ConcurrentCopying::ProcessMarkStackRef uses this with kCasRelease == true.
     // If kCasRelease == true, use a CAS release so that when GC updates all the fields of
     // an object and then changes the object from gray to black, the field updates (stores) will be
@@ -219,23 +266,8 @@
              CasLockWordWeakRelease(expected_lw, new_lw) :
              CasLockWordWeakRelaxed(expected_lw, new_lw)));
   return true;
-#elif USE_BROOKS_READ_BARRIER
-  DCHECK(kUseBrooksReadBarrier);
-  MemberOffset offset = OFFSET_OF_OBJECT_MEMBER(Object, x_rb_ptr_);
-  uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + offset.SizeValue();
-  Atomic<uint32_t>* atomic_rb_ptr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
-  HeapReference<Object> expected_ref(HeapReference<Object>::FromMirrorPtr(expected_rb_ptr));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromMirrorPtr(rb_ptr));
-  do {
-    if (UNLIKELY(atomic_rb_ptr->LoadRelaxed() != expected_ref.reference_)) {
-      // Lost the race.
-      return false;
-    }
-  } while (!atomic_rb_ptr->CompareExchangeWeakSequentiallyConsistent(expected_ref.reference_,
-                                                                     new_ref.reference_));
-  return true;
 #else
-  UNUSED(expected_rb_ptr, rb_ptr);
+  UNUSED(expected_rb_state, rb_state);
   LOG(FATAL) << "Unreachable";
   UNREACHABLE();
 #endif
@@ -259,23 +291,16 @@
 }
 
 
-inline void Object::AssertReadBarrierPointer() const {
-  if (kUseBakerReadBarrier) {
-    Object* obj = const_cast<Object*>(this);
-    DCHECK(obj->GetReadBarrierPointer() == nullptr)
-        << "Bad Baker pointer: obj=" << reinterpret_cast<void*>(obj)
-        << " ptr=" << reinterpret_cast<void*>(obj->GetReadBarrierPointer());
-  } else {
-    CHECK(kUseBrooksReadBarrier);
-    Object* obj = const_cast<Object*>(this);
-    DCHECK_EQ(obj, obj->GetReadBarrierPointer())
-        << "Bad Brooks pointer: obj=" << reinterpret_cast<void*>(obj)
-        << " ptr=" << reinterpret_cast<void*>(obj->GetReadBarrierPointer());
-  }
+inline void Object::AssertReadBarrierState() const {
+  CHECK(kUseBakerReadBarrier);
+  Object* obj = const_cast<Object*>(this);
+  DCHECK(obj->GetReadBarrierState() == ReadBarrier::WhiteState())
+      << "Bad Baker pointer: obj=" << reinterpret_cast<void*>(obj)
+      << " rb_state" << reinterpret_cast<void*>(obj->GetReadBarrierState());
 }
 
 template<VerifyObjectFlags kVerifyFlags>
-inline bool Object::VerifierInstanceOf(Class* klass) {
+inline bool Object::VerifierInstanceOf(ObjPtr<Class> klass) {
   DCHECK(klass != nullptr);
   DCHECK(GetClass<kVerifyFlags>() != nullptr);
   return klass->IsInterface() || InstanceOf(klass);
@@ -392,8 +417,8 @@
 template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsIntArray() {
   constexpr auto kNewFlags = static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis);
-  mirror::Class* klass = GetClass<kVerifyFlags, kReadBarrierOption>();
-  mirror::Class* component_type = klass->GetComponentType<kVerifyFlags, kReadBarrierOption>();
+  ObjPtr<Class> klass = GetClass<kVerifyFlags, kReadBarrierOption>();
+  ObjPtr<Class> component_type = klass->GetComponentType<kVerifyFlags, kReadBarrierOption>();
   return component_type != nullptr && component_type->template IsPrimitiveInt<kNewFlags>();
 }
 
@@ -406,8 +431,8 @@
 template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline bool Object::IsLongArray() {
   constexpr auto kNewFlags = static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis);
-  mirror::Class* klass = GetClass<kVerifyFlags, kReadBarrierOption>();
-  mirror::Class* component_type = klass->GetComponentType<kVerifyFlags, kReadBarrierOption>();
+  ObjPtr<Class> klass = GetClass<kVerifyFlags, kReadBarrierOption>();
+  ObjPtr<Class> component_type = klass->GetComponentType<kVerifyFlags, kReadBarrierOption>();
   return component_type != nullptr && component_type->template IsPrimitiveLong<kNewFlags>();
 }
 
@@ -510,7 +535,7 @@
         template GetObjectSize<kNewFlags, kReadBarrierOption>();
   }
   DCHECK_GE(result, sizeof(Object))
-      << " class=" << PrettyClass(GetClass<kNewFlags, kReadBarrierOption>());
+      << " class=" << Class::PrettyClass(GetClass<kNewFlags, kReadBarrierOption>());
   return result;
 }
 
@@ -895,18 +920,18 @@
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags,
     bool kIsVolatile>
 inline void Object::SetFieldObjectWithoutWriteBarrier(MemberOffset field_offset,
-                                                      Object* new_value) {
+                                                      ObjPtr<Object> new_value) {
   if (kCheckTransaction) {
     DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
   }
   if (kTransactionActive) {
-    mirror::Object* obj;
+    ObjPtr<Object> obj;
     if (kIsVolatile) {
       obj = GetFieldObjectVolatile<Object>(field_offset);
     } else {
       obj = GetFieldObject<Object>(field_offset);
     }
-    Runtime::Current()->RecordWriteFieldReference(this, field_offset, obj, true);
+    Runtime::Current()->RecordWriteFieldReference(this, field_offset, obj.Ptr(), true);
   }
   if (kVerifyFlags & kVerifyThis) {
     VerifyObject(this);
@@ -919,17 +944,17 @@
   if (kIsVolatile) {
     // TODO: Refactor to use a SequentiallyConsistent store instead.
     QuasiAtomic::ThreadFenceRelease();  // Ensure that prior accesses are visible before store.
-    objref_addr->Assign(new_value);
+    objref_addr->Assign(new_value.Ptr());
     QuasiAtomic::ThreadFenceSequentiallyConsistent();
                                 // Ensure this store occurs before any volatile loads.
   } else {
-    objref_addr->Assign(new_value);
+    objref_addr->Assign(new_value.Ptr());
   }
 }
 
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags,
     bool kIsVolatile>
-inline void Object::SetFieldObject(MemberOffset field_offset, Object* new_value) {
+inline void Object::SetFieldObject(MemberOffset field_offset, ObjPtr<Object> new_value) {
   SetFieldObjectWithoutWriteBarrier<kTransactionActive, kCheckTransaction, kVerifyFlags,
       kIsVolatile>(field_offset, new_value);
   if (new_value != nullptr) {
@@ -940,7 +965,7 @@
 }
 
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
-inline void Object::SetFieldObjectVolatile(MemberOffset field_offset, Object* new_value) {
+inline void Object::SetFieldObjectVolatile(MemberOffset field_offset, ObjPtr<Object> new_value) {
   SetFieldObject<kTransactionActive, kCheckTransaction, kVerifyFlags, true>(field_offset,
                                                                             new_value);
 }
@@ -956,7 +981,8 @@
 
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
 inline bool Object::CasFieldWeakSequentiallyConsistentObject(MemberOffset field_offset,
-                                                             Object* old_value, Object* new_value) {
+                                                             ObjPtr<Object> old_value,
+                                                             ObjPtr<Object> new_value) {
   bool success = CasFieldWeakSequentiallyConsistentObjectWithoutWriteBarrier<
       kTransactionActive, kCheckTransaction, kVerifyFlags>(field_offset, old_value, new_value);
   if (success) {
@@ -967,7 +993,9 @@
 
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
 inline bool Object::CasFieldWeakSequentiallyConsistentObjectWithoutWriteBarrier(
-    MemberOffset field_offset, Object* old_value, Object* new_value) {
+    MemberOffset field_offset,
+    ObjPtr<Object> old_value,
+    ObjPtr<Object> new_value) {
   if (kCheckTransaction) {
     DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
   }
@@ -983,8 +1011,8 @@
   if (kTransactionActive) {
     Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
   }
-  HeapReference<Object> old_ref(HeapReference<Object>::FromMirrorPtr(old_value));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromMirrorPtr(new_value));
+  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
+  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
 
@@ -995,7 +1023,8 @@
 
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
 inline bool Object::CasFieldStrongSequentiallyConsistentObject(MemberOffset field_offset,
-                                                               Object* old_value, Object* new_value) {
+                                                               ObjPtr<Object> old_value,
+                                                               ObjPtr<Object> new_value) {
   bool success = CasFieldStrongSequentiallyConsistentObjectWithoutWriteBarrier<
       kTransactionActive, kCheckTransaction, kVerifyFlags>(field_offset, old_value, new_value);
   if (success) {
@@ -1006,7 +1035,9 @@
 
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
 inline bool Object::CasFieldStrongSequentiallyConsistentObjectWithoutWriteBarrier(
-    MemberOffset field_offset, Object* old_value, Object* new_value) {
+    MemberOffset field_offset,
+    ObjPtr<Object> old_value,
+    ObjPtr<Object> new_value) {
   if (kCheckTransaction) {
     DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
   }
@@ -1022,8 +1053,8 @@
   if (kTransactionActive) {
     Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
   }
-  HeapReference<Object> old_ref(HeapReference<Object>::FromMirrorPtr(old_value));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromMirrorPtr(new_value));
+  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
+  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
 
@@ -1034,7 +1065,9 @@
 
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
 inline bool Object::CasFieldWeakRelaxedObjectWithoutWriteBarrier(
-    MemberOffset field_offset, Object* old_value, Object* new_value) {
+    MemberOffset field_offset,
+    ObjPtr<Object> old_value,
+    ObjPtr<Object> new_value) {
   if (kCheckTransaction) {
     DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
   }
@@ -1050,8 +1083,8 @@
   if (kTransactionActive) {
     Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
   }
-  HeapReference<Object> old_ref(HeapReference<Object>::FromMirrorPtr(old_value));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromMirrorPtr(new_value));
+  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
+  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
 
@@ -1062,7 +1095,9 @@
 
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
 inline bool Object::CasFieldStrongRelaxedObjectWithoutWriteBarrier(
-    MemberOffset field_offset, Object* old_value, Object* new_value) {
+    MemberOffset field_offset,
+    ObjPtr<Object> old_value,
+    ObjPtr<Object> new_value) {
   if (kCheckTransaction) {
     DCHECK_EQ(kTransactionActive, Runtime::Current()->IsActiveTransaction());
   }
@@ -1078,8 +1113,8 @@
   if (kTransactionActive) {
     Runtime::Current()->RecordWriteFieldReference(this, field_offset, old_value, true);
   }
-  HeapReference<Object> old_ref(HeapReference<Object>::FromMirrorPtr(old_value));
-  HeapReference<Object> new_ref(HeapReference<Object>::FromMirrorPtr(new_value));
+  HeapReference<Object> old_ref(HeapReference<Object>::FromObjPtr(old_value));
+  HeapReference<Object> new_ref(HeapReference<Object>::FromObjPtr(new_value));
   uint8_t* raw_addr = reinterpret_cast<uint8_t*>(this) + field_offset.Int32Value();
   Atomic<uint32_t>* atomic_addr = reinterpret_cast<Atomic<uint32_t>*>(raw_addr);
 
@@ -1107,7 +1142,7 @@
     // There is no reference offset bitmap. In the non-static case, walk up the class
     // inheritance hierarchy and find reference offsets the hard way. In the static case, just
     // consider this class.
-    for (mirror::Class* klass = kIsStatic
+    for (ObjPtr<Class> klass = kIsStatic
             ? AsClass<kVerifyFlags, kReadBarrierOption>()
             : GetClass<kVerifyFlags, kReadBarrierOption>();
         klass != nullptr;
@@ -1136,13 +1171,13 @@
 }
 
 template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption, typename Visitor>
-inline void Object::VisitInstanceFieldsReferences(mirror::Class* klass, const Visitor& visitor) {
+inline void Object::VisitInstanceFieldsReferences(ObjPtr<Class> klass, const Visitor& visitor) {
   VisitFieldsReferences<false, kVerifyFlags, kReadBarrierOption>(
       klass->GetReferenceInstanceOffsets<kVerifyFlags>(), visitor);
 }
 
 template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption, typename Visitor>
-inline void Object::VisitStaticFieldsReferences(mirror::Class* klass, const Visitor& visitor) {
+inline void Object::VisitStaticFieldsReferences(ObjPtr<Class> klass, const Visitor& visitor) {
   DCHECK(!klass->IsTemp());
   klass->VisitFieldsReferences<true, kVerifyFlags, kReadBarrierOption>(0, visitor);
 }
@@ -1176,7 +1211,7 @@
           typename JavaLangRefVisitor>
 inline void Object::VisitReferences(const Visitor& visitor,
                                     const JavaLangRefVisitor& ref_visitor) {
-  mirror::Class* klass = GetClass<kVerifyFlags, kReadBarrierOption>();
+  ObjPtr<Class> klass = GetClass<kVerifyFlags, kReadBarrierOption>();
   visitor(this, ClassOffset(), false);
   const uint32_t class_flags = klass->GetClassFlags<kVerifyNone>();
   if (LIKELY(class_flags == kClassFlagNormal)) {
@@ -1191,7 +1226,7 @@
       DCHECK(!klass->IsStringClass());
       if (class_flags == kClassFlagClass) {
         DCHECK((klass->IsClassClass<kVerifyFlags, kReadBarrierOption>()));
-        mirror::Class* as_klass = AsClass<kVerifyNone, kReadBarrierOption>();
+        ObjPtr<Class> as_klass = AsClass<kVerifyNone, kReadBarrierOption>();
         as_klass->VisitReferences<kVisitNativeRoots, kVerifyFlags, kReadBarrierOption>(klass,
                                                                                        visitor);
       } else if (class_flags == kClassFlagObjectArray) {
@@ -1218,7 +1253,7 @@
       // actual string instances.
       if (!klass->IsStringClass()) {
         size_t total_reference_instance_fields = 0;
-        mirror::Class* super_class = klass;
+        ObjPtr<Class> super_class = klass;
         do {
           total_reference_instance_fields += super_class->NumReferenceInstanceFields();
           super_class = super_class->GetSuperClass<kVerifyFlags, kReadBarrierOption>();
diff --git a/runtime/mirror/object.cc b/runtime/mirror/object.cc
index 90b97fd..f5b9ab3 100644
--- a/runtime/mirror/object.cc
+++ b/runtime/mirror/object.cc
@@ -43,19 +43,19 @@
 
 class CopyReferenceFieldsWithReadBarrierVisitor {
  public:
-  explicit CopyReferenceFieldsWithReadBarrierVisitor(Object* dest_obj)
+  explicit CopyReferenceFieldsWithReadBarrierVisitor(ObjPtr<Object> dest_obj)
       : dest_obj_(dest_obj) {}
 
-  void operator()(Object* obj, MemberOffset offset, bool /* is_static */) const
+  void operator()(ObjPtr<Object> obj, MemberOffset offset, bool /* is_static */) const
       ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
     // GetFieldObject() contains a RB.
-    Object* ref = obj->GetFieldObject<Object>(offset);
+    ObjPtr<Object> ref = obj->GetFieldObject<Object>(offset);
     // No WB here as a large object space does not have a card table
     // coverage. Instead, cards will be marked separately.
     dest_obj_->SetFieldObjectWithoutWriteBarrier<false, false>(offset, ref);
   }
 
-  void operator()(mirror::Class* klass, mirror::Reference* ref) const
+  void operator()(ObjPtr<mirror::Class> klass, mirror::Reference* ref) const
       ALWAYS_INLINE REQUIRES_SHARED(Locks::mutator_lock_) {
     // Copy java.lang.ref.Reference.referent which isn't visited in
     // Object::VisitReferences().
@@ -69,27 +69,56 @@
   void VisitRoot(mirror::CompressedReference<mirror::Object>* root ATTRIBUTE_UNUSED) const {}
 
  private:
-  Object* const dest_obj_;
+  ObjPtr<Object> const dest_obj_;
 };
 
-Object* Object::CopyObject(Thread* self, mirror::Object* dest, mirror::Object* src,
+Object* Object::CopyObject(ObjPtr<mirror::Object> dest,
+                           ObjPtr<mirror::Object> src,
                            size_t num_bytes) {
-  // Copy instance data.  We assume memcpy copies by words.
-  // TODO: expose and use move32.
-  uint8_t* src_bytes = reinterpret_cast<uint8_t*>(src);
-  uint8_t* dst_bytes = reinterpret_cast<uint8_t*>(dest);
-  size_t offset = sizeof(Object);
-  memcpy(dst_bytes + offset, src_bytes + offset, num_bytes - offset);
+  // Copy instance data.  Don't assume memcpy copies by words (b/32012820).
+  {
+    const size_t offset = sizeof(Object);
+    uint8_t* src_bytes = reinterpret_cast<uint8_t*>(src.Ptr()) + offset;
+    uint8_t* dst_bytes = reinterpret_cast<uint8_t*>(dest.Ptr()) + offset;
+    num_bytes -= offset;
+    DCHECK_ALIGNED(src_bytes, sizeof(uintptr_t));
+    DCHECK_ALIGNED(dst_bytes, sizeof(uintptr_t));
+    // Use word sized copies to begin.
+    while (num_bytes >= sizeof(uintptr_t)) {
+      reinterpret_cast<Atomic<uintptr_t>*>(dst_bytes)->StoreRelaxed(
+          reinterpret_cast<Atomic<uintptr_t>*>(src_bytes)->LoadRelaxed());
+      src_bytes += sizeof(uintptr_t);
+      dst_bytes += sizeof(uintptr_t);
+      num_bytes -= sizeof(uintptr_t);
+    }
+    // Copy possible 32 bit word.
+    if (sizeof(uintptr_t) != sizeof(uint32_t) && num_bytes >= sizeof(uint32_t)) {
+      reinterpret_cast<Atomic<uint32_t>*>(dst_bytes)->StoreRelaxed(
+          reinterpret_cast<Atomic<uint32_t>*>(src_bytes)->LoadRelaxed());
+      src_bytes += sizeof(uint32_t);
+      dst_bytes += sizeof(uint32_t);
+      num_bytes -= sizeof(uint32_t);
+    }
+    // Copy remaining bytes, avoid going past the end of num_bytes since there may be a redzone
+    // there.
+    while (num_bytes > 0) {
+      reinterpret_cast<Atomic<uint8_t>*>(dst_bytes)->StoreRelaxed(
+          reinterpret_cast<Atomic<uint8_t>*>(src_bytes)->LoadRelaxed());
+      src_bytes += sizeof(uint8_t);
+      dst_bytes += sizeof(uint8_t);
+      num_bytes -= sizeof(uint8_t);
+    }
+  }
+
   if (kUseReadBarrier) {
-    // We need a RB here. After the memcpy that covers the whole
-    // object above, copy references fields one by one again with a
-    // RB. TODO: Optimize this later?
+    // We need a RB here. After copying the whole object above, copy references fields one by one
+    // again with a RB to make sure there are no from space refs. TODO: Optimize this later?
     CopyReferenceFieldsWithReadBarrierVisitor visitor(dest);
     src->VisitReferences(visitor, visitor);
   }
   gc::Heap* heap = Runtime::Current()->GetHeap();
   // Perform write barriers on copied object references.
-  Class* c = src->GetClass();
+  ObjPtr<Class> c = src->GetClass();
   if (c->IsArrayClass()) {
     if (!c->GetComponentType()->IsPrimitive()) {
       ObjectArray<Object>* array = dest->AsObjectArray<Object>();
@@ -98,26 +127,21 @@
   } else {
     heap->WriteBarrierEveryFieldOf(dest);
   }
-  if (c->IsFinalizable()) {
-    heap->AddFinalizerReference(self, &dest);
-  }
-  return dest;
+  return dest.Ptr();
 }
 
 // An allocation pre-fence visitor that copies the object.
 class CopyObjectVisitor {
  public:
-  CopyObjectVisitor(Thread* self, Handle<Object>* orig, size_t num_bytes)
-      : self_(self), orig_(orig), num_bytes_(num_bytes) {
-  }
+  CopyObjectVisitor(Handle<Object>* orig, size_t num_bytes)
+      : orig_(orig), num_bytes_(num_bytes) {}
 
-  void operator()(Object* obj, size_t usable_size ATTRIBUTE_UNUSED) const
+  void operator()(ObjPtr<Object> obj, size_t usable_size ATTRIBUTE_UNUSED) const
       REQUIRES_SHARED(Locks::mutator_lock_) {
-    Object::CopyObject(self_, obj, orig_->Get(), num_bytes_);
+    Object::CopyObject(obj, orig_->Get(), num_bytes_);
   }
 
  private:
-  Thread* const self_;
   Handle<Object>* const orig_;
   const size_t num_bytes_;
   DISALLOW_COPY_AND_ASSIGN(CopyObjectVisitor);
@@ -131,14 +155,17 @@
   size_t num_bytes = SizeOf();
   StackHandleScope<1> hs(self);
   Handle<Object> this_object(hs.NewHandle(this));
-  Object* copy;
-  CopyObjectVisitor visitor(self, &this_object, num_bytes);
+  ObjPtr<Object> copy;
+  CopyObjectVisitor visitor(&this_object, num_bytes);
   if (heap->IsMovableObject(this)) {
     copy = heap->AllocObject<true>(self, GetClass(), num_bytes, visitor);
   } else {
     copy = heap->AllocNonMovableObject<true>(self, GetClass(), num_bytes, visitor);
   }
-  return copy;
+  if (this_object->GetClass()->IsFinalizable()) {
+    heap->AddFinalizerReference(self, &copy);
+  }
+  return copy.Ptr();
 }
 
 uint32_t Object::GenerateIdentityHashCode() {
@@ -155,8 +182,8 @@
   hash_code_seed.StoreRelaxed(new_seed);
 }
 
-int32_t Object::IdentityHashCode() const {
-  mirror::Object* current_this = const_cast<mirror::Object*>(this);
+int32_t Object::IdentityHashCode() {
+  ObjPtr<Object> current_this = this;  // The this pointer may get invalidated by thread suspension.
   while (true) {
     LockWord lw = current_this->GetLockWord(false);
     switch (lw.GetState()) {
@@ -165,7 +192,7 @@
         // loop iteration.
         LockWord hash_word = LockWord::FromHashCode(GenerateIdentityHashCode(), lw.GCState());
         DCHECK_EQ(hash_word.GetState(), LockWord::kHashCode);
-        if (const_cast<Object*>(this)->CasLockWordWeakRelaxed(lw, hash_word)) {
+        if (current_this->CasLockWordWeakRelaxed(lw, hash_word)) {
           return hash_word.GetHashCode();
         }
         break;
@@ -199,17 +226,15 @@
   UNREACHABLE();
 }
 
-void Object::CheckFieldAssignmentImpl(MemberOffset field_offset, Object* new_value) {
-  Class* c = GetClass();
+void Object::CheckFieldAssignmentImpl(MemberOffset field_offset, ObjPtr<Object> new_value) {
+  ObjPtr<Class> c = GetClass();
   Runtime* runtime = Runtime::Current();
   if (runtime->GetClassLinker() == nullptr || !runtime->IsStarted() ||
       !runtime->GetHeap()->IsObjectValidationEnabled() || !c->IsResolved()) {
     return;
   }
-  for (Class* cur = c; cur != nullptr; cur = cur->GetSuperClass()) {
+  for (ObjPtr<Class> cur = c; cur != nullptr; cur = cur->GetSuperClass()) {
     for (ArtField& field : cur->GetIFields()) {
-      StackHandleScope<1> hs(Thread::Current());
-      Handle<Object> h_object(hs.NewHandle(new_value));
       if (field.GetOffset().Int32Value() == field_offset.Int32Value()) {
         CHECK_NE(field.GetTypeAsPrimitiveType(), Primitive::kPrimNot);
         // TODO: resolve the field type for moving GC.
@@ -239,7 +264,7 @@
     }
   }
   LOG(FATAL) << "Failed to find field for assignment to " << reinterpret_cast<void*>(this)
-      << " of type " << PrettyDescriptor(c) << " at offset " << field_offset;
+      << " of type " << c->PrettyDescriptor() << " at offset " << field_offset;
   UNREACHABLE();
 }
 
@@ -248,5 +273,24 @@
       : ArtField::FindInstanceFieldWithOffset(GetClass(), offset.Uint32Value());
 }
 
+std::string Object::PrettyTypeOf(ObjPtr<mirror::Object> obj) {
+  if (obj == nullptr) {
+    return "null";
+  }
+  return obj->PrettyTypeOf();
+}
+
+std::string Object::PrettyTypeOf() {
+  if (GetClass() == nullptr) {
+    return "(raw)";
+  }
+  std::string temp;
+  std::string result(PrettyDescriptor(GetClass()->GetDescriptor(&temp)));
+  if (IsClass()) {
+    result += "<" + PrettyDescriptor(AsClass()->GetDescriptor(&temp)) + ">";
+  }
+  return result;
+}
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/object.h b/runtime/mirror/object.h
index 10faf60..67b5ddb 100644
--- a/runtime/mirror/object.h
+++ b/runtime/mirror/object.h
@@ -92,21 +92,24 @@
   ALWAYS_INLINE Class* GetClass() REQUIRES_SHARED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  void SetClass(Class* new_klass) REQUIRES_SHARED(Locks::mutator_lock_);
+  void SetClass(ObjPtr<Class> new_klass) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // TODO: Clean these up and change to return int32_t
-  Object* GetReadBarrierPointer() REQUIRES_SHARED(Locks::mutator_lock_);
-
-  // Get the read barrier pointer with release semantics, only supported for baker.
-  Object* GetReadBarrierPointerAcquire() REQUIRES_SHARED(Locks::mutator_lock_);
+  // Get the read barrier state with a fake address dependency.
+  // '*fake_address_dependency' will be set to 0.
+  ALWAYS_INLINE uint32_t GetReadBarrierState(uintptr_t* fake_address_dependency)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+  // This version does not offer any special mechanism to prevent load-load reordering.
+  ALWAYS_INLINE uint32_t GetReadBarrierState() REQUIRES_SHARED(Locks::mutator_lock_);
+  // Get the read barrier state with a load-acquire.
+  ALWAYS_INLINE uint32_t GetReadBarrierStateAcquire() REQUIRES_SHARED(Locks::mutator_lock_);
 
 #ifndef USE_BAKER_OR_BROOKS_READ_BARRIER
   NO_RETURN
 #endif
-  void SetReadBarrierPointer(Object* rb_ptr) REQUIRES_SHARED(Locks::mutator_lock_);
+  ALWAYS_INLINE void SetReadBarrierState(uint32_t rb_state) REQUIRES_SHARED(Locks::mutator_lock_);
 
   template<bool kCasRelease = false>
-  ALWAYS_INLINE bool AtomicSetReadBarrierPointer(Object* expected_rb_ptr, Object* rb_ptr)
+  ALWAYS_INLINE bool AtomicSetReadBarrierState(uint32_t expected_rb_state, uint32_t rb_state)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   ALWAYS_INLINE uint32_t GetMarkBit() REQUIRES_SHARED(Locks::mutator_lock_);
@@ -114,12 +117,13 @@
   ALWAYS_INLINE bool AtomicSetMarkBit(uint32_t expected_mark_bit, uint32_t mark_bit)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void AssertReadBarrierPointer() const REQUIRES_SHARED(Locks::mutator_lock_);
+  // Assert that the read barrier state is in the default (white) state.
+  ALWAYS_INLINE void AssertReadBarrierState() const REQUIRES_SHARED(Locks::mutator_lock_);
 
   // The verifier treats all interfaces as java.lang.Object and relies on runtime checks in
   // invoke-interface to detect incompatible interface types.
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  bool VerifierInstanceOf(Class* klass) REQUIRES_SHARED(Locks::mutator_lock_);
+  bool VerifierInstanceOf(ObjPtr<Class> klass) REQUIRES_SHARED(Locks::mutator_lock_);
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   ALWAYS_INLINE bool InstanceOf(ObjPtr<Class> klass) REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -130,9 +134,10 @@
   Object* Clone(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Roles::uninterruptible_);
 
-  int32_t IdentityHashCode() const
+  int32_t IdentityHashCode()
       REQUIRES_SHARED(Locks::mutator_lock_)
-      REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
+      REQUIRES(!Locks::thread_list_lock_,
+               !Locks::thread_suspend_count_lock_);
 
   static MemberOffset MonitorOffset() {
     return OFFSET_OF_OBJECT_MEMBER(Object, monitor_);
@@ -283,54 +288,69 @@
   ALWAYS_INLINE T* GetFieldObjectVolatile(MemberOffset field_offset)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  template<bool kTransactionActive, bool kCheckTransaction = true,
-      VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kIsVolatile = false>
-  ALWAYS_INLINE void SetFieldObjectWithoutWriteBarrier(MemberOffset field_offset, Object* new_value)
+  template<bool kTransactionActive,
+           bool kCheckTransaction = true,
+           VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           bool kIsVolatile = false>
+  ALWAYS_INLINE void SetFieldObjectWithoutWriteBarrier(MemberOffset field_offset,
+                                                       ObjPtr<Object> new_value)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  template<bool kTransactionActive, bool kCheckTransaction = true,
-      VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kIsVolatile = false>
-  ALWAYS_INLINE void SetFieldObject(MemberOffset field_offset, Object* new_value)
+  template<bool kTransactionActive,
+           bool kCheckTransaction = true,
+           VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           bool kIsVolatile = false>
+  ALWAYS_INLINE void SetFieldObject(MemberOffset field_offset, ObjPtr<Object> new_value)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  template<bool kTransactionActive, bool kCheckTransaction = true,
-      VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  ALWAYS_INLINE void SetFieldObjectVolatile(MemberOffset field_offset, Object* new_value)
+  template<bool kTransactionActive,
+           bool kCheckTransaction = true,
+           VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  ALWAYS_INLINE void SetFieldObjectVolatile(MemberOffset field_offset,
+                                            ObjPtr<Object> new_value)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  template<bool kTransactionActive, bool kCheckTransaction = true,
-      VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  bool CasFieldWeakSequentiallyConsistentObject(MemberOffset field_offset, Object* old_value,
-                                                Object* new_value)
+  template<bool kTransactionActive,
+           bool kCheckTransaction = true,
+           VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  bool CasFieldWeakSequentiallyConsistentObject(MemberOffset field_offset,
+                                                ObjPtr<Object> old_value,
+                                                ObjPtr<Object> new_value)
       REQUIRES_SHARED(Locks::mutator_lock_);
-  template<bool kTransactionActive, bool kCheckTransaction = true,
-      VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<bool kTransactionActive,
+           bool kCheckTransaction = true,
+           VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   bool CasFieldWeakSequentiallyConsistentObjectWithoutWriteBarrier(MemberOffset field_offset,
-                                                                   Object* old_value,
-                                                                   Object* new_value)
+                                                                   ObjPtr<Object> old_value,
+                                                                   ObjPtr<Object> new_value)
       REQUIRES_SHARED(Locks::mutator_lock_);
-  template<bool kTransactionActive, bool kCheckTransaction = true,
-      VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  bool CasFieldStrongSequentiallyConsistentObject(MemberOffset field_offset, Object* old_value,
-                                                  Object* new_value)
+  template<bool kTransactionActive,
+           bool kCheckTransaction = true,
+           VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  bool CasFieldStrongSequentiallyConsistentObject(MemberOffset field_offset,
+                                                  ObjPtr<Object> old_value,
+                                                  ObjPtr<Object> new_value)
       REQUIRES_SHARED(Locks::mutator_lock_);
-  template<bool kTransactionActive, bool kCheckTransaction = true,
-      VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<bool kTransactionActive,
+           bool kCheckTransaction = true,
+           VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   bool CasFieldStrongSequentiallyConsistentObjectWithoutWriteBarrier(MemberOffset field_offset,
-                                                                     Object* old_value,
-                                                                     Object* new_value)
+                                                                     ObjPtr<Object> old_value,
+                                                                     ObjPtr<Object> new_value)
       REQUIRES_SHARED(Locks::mutator_lock_);
-  template<bool kTransactionActive, bool kCheckTransaction = true,
-      VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<bool kTransactionActive,
+           bool kCheckTransaction = true,
+           VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   bool CasFieldWeakRelaxedObjectWithoutWriteBarrier(MemberOffset field_offset,
-                                                    Object* old_value,
-                                                    Object* new_value)
+                                                    ObjPtr<Object> old_value,
+                                                    ObjPtr<Object> new_value)
       REQUIRES_SHARED(Locks::mutator_lock_);
-  template<bool kTransactionActive, bool kCheckTransaction = true,
-      VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
+  template<bool kTransactionActive,
+           bool kCheckTransaction = true,
+           VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   bool CasFieldStrongRelaxedObjectWithoutWriteBarrier(MemberOffset field_offset,
-                                                      Object* old_value,
-                                                      Object* new_value)
+                                                      ObjPtr<Object> old_value,
+                                                      ObjPtr<Object> new_value)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
@@ -528,6 +548,15 @@
   // Generate an identity hash code. Public for object test.
   static uint32_t GenerateIdentityHashCode();
 
+  // Returns a human-readable form of the name of the *class* of the given object.
+  // So given an instance of java.lang.String, the output would
+  // be "java.lang.String". Given an array of int, the output would be "int[]".
+  // Given String.class, the output would be "java.lang.Class<java.lang.String>".
+  static std::string PrettyTypeOf(ObjPtr<mirror::Object> obj)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+  std::string PrettyTypeOf()
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
  protected:
   // Accessors for non-Java type fields
   template<class T, VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags, bool kIsVolatile = false>
@@ -563,12 +592,12 @@
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
            typename Visitor>
-  void VisitInstanceFieldsReferences(mirror::Class* klass, const Visitor& visitor) HOT_ATTR
+  void VisitInstanceFieldsReferences(ObjPtr<mirror::Class> klass, const Visitor& visitor) HOT_ATTR
       REQUIRES_SHARED(Locks::mutator_lock_);
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
            ReadBarrierOption kReadBarrierOption = kWithReadBarrier,
            typename Visitor>
-  void VisitStaticFieldsReferences(mirror::Class* klass, const Visitor& visitor) HOT_ATTR
+  void VisitStaticFieldsReferences(ObjPtr<mirror::Class> klass, const Visitor& visitor) HOT_ATTR
       REQUIRES_SHARED(Locks::mutator_lock_);
 
  private:
@@ -585,19 +614,20 @@
 
   // Verify the type correctness of stores to fields.
   // TODO: This can cause thread suspension and isn't moving GC safe.
-  void CheckFieldAssignmentImpl(MemberOffset field_offset, Object* new_value)
+  void CheckFieldAssignmentImpl(MemberOffset field_offset, ObjPtr<Object> new_value)
       REQUIRES_SHARED(Locks::mutator_lock_);
-  void CheckFieldAssignment(MemberOffset field_offset, Object* new_value)
+  void CheckFieldAssignment(MemberOffset field_offset, ObjPtr<Object>new_value)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     if (kCheckFieldAssignments) {
       CheckFieldAssignmentImpl(field_offset, new_value);
     }
   }
 
-  // A utility function that copies an object in a read barrier and
-  // write barrier-aware way. This is internally used by Clone() and
-  // Class::CopyOf().
-  static Object* CopyObject(Thread* self, mirror::Object* dest, mirror::Object* src,
+  // A utility function that copies an object in a read barrier and write barrier-aware way.
+  // This is internally used by Clone() and Class::CopyOf(). If the object is finalizable,
+  // it is the callers job to call Heap::AddFinalizerReference.
+  static Object* CopyObject(ObjPtr<mirror::Object> dest,
+                            ObjPtr<mirror::Object> src,
                             size_t num_bytes)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
diff --git a/runtime/mirror/object_array-inl.h b/runtime/mirror/object_array-inl.h
index c3c5231..0fdf132 100644
--- a/runtime/mirror/object_array-inl.h
+++ b/runtime/mirror/object_array-inl.h
@@ -25,6 +25,7 @@
 #include "base/stringprintf.h"
 #include "gc/heap.h"
 #include "mirror/class.h"
+#include "obj_ptr-inl.h"
 #include "runtime.h"
 #include "handle_scope-inl.h"
 #include "thread.h"
@@ -34,24 +35,29 @@
 namespace mirror {
 
 template<class T>
-inline ObjectArray<T>* ObjectArray<T>::Alloc(Thread* self, Class* object_array_class,
+inline ObjectArray<T>* ObjectArray<T>::Alloc(Thread* self,
+                                             ObjPtr<Class> object_array_class,
                                              int32_t length, gc::AllocatorType allocator_type) {
-  Array* array = Array::Alloc<true>(self, object_array_class, length,
-                                    ComponentSizeShiftWidth(sizeof(HeapReference<Object>)),
+  Array* array = Array::Alloc<true>(self,
+                                    object_array_class.Ptr(),
+                                    length,
+                                    ComponentSizeShiftWidth(kHeapReferenceSize),
                                     allocator_type);
   if (UNLIKELY(array == nullptr)) {
     return nullptr;
-  } else {
-    DCHECK_EQ(array->GetClass()->GetComponentSizeShift(),
-              ComponentSizeShiftWidth(sizeof(HeapReference<Object>)));
-    return array->AsObjectArray<T>();
   }
+  DCHECK_EQ(array->GetClass()->GetComponentSizeShift(),
+            ComponentSizeShiftWidth(kHeapReferenceSize));
+  return array->AsObjectArray<T>();
 }
 
 template<class T>
-inline ObjectArray<T>* ObjectArray<T>::Alloc(Thread* self, Class* object_array_class,
+inline ObjectArray<T>* ObjectArray<T>::Alloc(Thread* self,
+                                             ObjPtr<Class> object_array_class,
                                              int32_t length) {
-  return Alloc(self, object_array_class, length,
+  return Alloc(self,
+               object_array_class,
+               length,
                Runtime::Current()->GetHeap()->GetCurrentAllocator());
 }
 
@@ -65,7 +71,7 @@
 }
 
 template<class T> template<VerifyObjectFlags kVerifyFlags>
-inline bool ObjectArray<T>::CheckAssignable(T* object) {
+inline bool ObjectArray<T>::CheckAssignable(ObjPtr<T> object) {
   if (object != nullptr) {
     Class* element_class = GetClass<kVerifyFlags>()->GetComponentType();
     if (UNLIKELY(!object->InstanceOf(element_class))) {
@@ -77,7 +83,7 @@
 }
 
 template<class T>
-inline void ObjectArray<T>::Set(int32_t i, T* object) {
+inline void ObjectArray<T>::Set(int32_t i, ObjPtr<T> object) {
   if (Runtime::Current()->IsActiveTransaction()) {
     Set<true>(i, object);
   } else {
@@ -87,7 +93,7 @@
 
 template<class T>
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
-inline void ObjectArray<T>::Set(int32_t i, T* object) {
+inline void ObjectArray<T>::Set(int32_t i, ObjPtr<T> object) {
   if (CheckIsValidIndex(i) && CheckAssignable<kVerifyFlags>(object)) {
     SetFieldObject<kTransactionActive, kCheckTransaction, kVerifyFlags>(OffsetOfElement(i), object);
   } else {
@@ -97,7 +103,7 @@
 
 template<class T>
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
-inline void ObjectArray<T>::SetWithoutChecks(int32_t i, T* object) {
+inline void ObjectArray<T>::SetWithoutChecks(int32_t i, ObjPtr<T> object) {
   DCHECK(CheckIsValidIndex<kVerifyFlags>(i));
   DCHECK(CheckAssignable<static_cast<VerifyObjectFlags>(kVerifyFlags & ~kVerifyThis)>(object));
   SetFieldObject<kTransactionActive, kCheckTransaction, kVerifyFlags>(OffsetOfElement(i), object);
@@ -105,7 +111,7 @@
 
 template<class T>
 template<bool kTransactionActive, bool kCheckTransaction, VerifyObjectFlags kVerifyFlags>
-inline void ObjectArray<T>::SetWithoutChecksAndWriteBarrier(int32_t i, T* object) {
+inline void ObjectArray<T>::SetWithoutChecksAndWriteBarrier(int32_t i, ObjPtr<T> object) {
   DCHECK(CheckIsValidIndex<kVerifyFlags>(i));
   // TODO:  enable this check. It fails when writing the image in ImageWriter::FixupObjectArray.
   // DCHECK(CheckAssignable(object));
@@ -113,15 +119,17 @@
       OffsetOfElement(i), object);
 }
 
-template<class T>
+template<class T> template<VerifyObjectFlags kVerifyFlags, ReadBarrierOption kReadBarrierOption>
 inline T* ObjectArray<T>::GetWithoutChecks(int32_t i) {
   DCHECK(CheckIsValidIndex(i));
-  return GetFieldObject<T>(OffsetOfElement(i));
+  return GetFieldObject<T, kVerifyFlags, kReadBarrierOption>(OffsetOfElement(i));
 }
 
 template<class T>
-inline void ObjectArray<T>::AssignableMemmove(int32_t dst_pos, ObjectArray<T>* src,
-                                              int32_t src_pos, int32_t count) {
+inline void ObjectArray<T>::AssignableMemmove(int32_t dst_pos,
+                                              ObjPtr<ObjectArray<T>> src,
+                                              int32_t src_pos,
+                                              int32_t count) {
   if (kIsDebugBuild) {
     for (int i = 0; i < count; ++i) {
       // The get will perform the VerifyObject.
@@ -131,28 +139,60 @@
   // Perform the memmove using int memmove then perform the write barrier.
   static_assert(sizeof(HeapReference<T>) == sizeof(uint32_t),
                 "art::mirror::HeapReference<T> and uint32_t have different sizes.");
-  IntArray* dstAsIntArray = reinterpret_cast<IntArray*>(this);
-  IntArray* srcAsIntArray = reinterpret_cast<IntArray*>(src);
-  if (kUseReadBarrier) {
-    // TODO: Optimize this later?
-    const bool copy_forward = (src != this) || (dst_pos < src_pos) || (dst_pos - src_pos >= count);
-    if (copy_forward) {
-      // Forward copy.
+  // TODO: Optimize this later?
+  // We can't use memmove since it does not handle read barriers and may do by per byte copying.
+  // See b/32012820.
+  const bool copy_forward = (src != this) || (dst_pos < src_pos) || (dst_pos - src_pos >= count);
+  if (copy_forward) {
+    // Forward copy.
+    bool baker_non_gray_case = false;
+    if (kUseReadBarrier && kUseBakerReadBarrier) {
+      uintptr_t fake_address_dependency;
+      if (!ReadBarrier::IsGray(src.Ptr(), &fake_address_dependency)) {
+        baker_non_gray_case = true;
+        DCHECK_EQ(fake_address_dependency, 0U);
+        src.Assign(reinterpret_cast<ObjectArray<T>*>(
+            reinterpret_cast<uintptr_t>(src.Ptr()) | fake_address_dependency));
+        for (int i = 0; i < count; ++i) {
+          // We can skip the RB here because 'src' isn't gray.
+          T* obj = src->template GetWithoutChecks<kDefaultVerifyFlags, kWithoutReadBarrier>(
+              src_pos + i);
+          SetWithoutChecksAndWriteBarrier<false>(dst_pos + i, obj);
+        }
+      }
+    }
+    if (!baker_non_gray_case) {
       for (int i = 0; i < count; ++i) {
         // We need a RB here. ObjectArray::GetWithoutChecks() contains a RB.
-        Object* obj = src->GetWithoutChecks(src_pos + i);
-        SetWithoutChecks<false>(dst_pos + i, obj);
-      }
-    } else {
-      // Backward copy.
-      for (int i = count - 1; i >= 0; --i) {
-        // We need a RB here. ObjectArray::GetWithoutChecks() contains a RB.
-        Object* obj = src->GetWithoutChecks(src_pos + i);
-        SetWithoutChecks<false>(dst_pos + i, obj);
+        T* obj = src->GetWithoutChecks(src_pos + i);
+        SetWithoutChecksAndWriteBarrier<false>(dst_pos + i, obj);
       }
     }
   } else {
-    dstAsIntArray->Memmove(dst_pos, srcAsIntArray, src_pos, count);
+    // Backward copy.
+    bool baker_non_gray_case = false;
+    if (kUseReadBarrier && kUseBakerReadBarrier) {
+      uintptr_t fake_address_dependency;
+      if (!ReadBarrier::IsGray(src.Ptr(), &fake_address_dependency)) {
+        baker_non_gray_case = true;
+        DCHECK_EQ(fake_address_dependency, 0U);
+        src.Assign(reinterpret_cast<ObjectArray<T>*>(
+            reinterpret_cast<uintptr_t>(src.Ptr()) | fake_address_dependency));
+        for (int i = count - 1; i >= 0; --i) {
+          // We can skip the RB here because 'src' isn't gray.
+          T* obj = src->template GetWithoutChecks<kDefaultVerifyFlags, kWithoutReadBarrier>(
+              src_pos + i);
+          SetWithoutChecksAndWriteBarrier<false>(dst_pos + i, obj);
+        }
+      }
+    }
+    if (!baker_non_gray_case) {
+      for (int i = count - 1; i >= 0; --i) {
+        // We need a RB here. ObjectArray::GetWithoutChecks() contains a RB.
+        T* obj = src->GetWithoutChecks(src_pos + i);
+        SetWithoutChecksAndWriteBarrier<false>(dst_pos + i, obj);
+      }
+    }
   }
   Runtime::Current()->GetHeap()->WriteBarrierArray(this, dst_pos, count);
   if (kIsDebugBuild) {
@@ -164,8 +204,10 @@
 }
 
 template<class T>
-inline void ObjectArray<T>::AssignableMemcpy(int32_t dst_pos, ObjectArray<T>* src,
-                                             int32_t src_pos, int32_t count) {
+inline void ObjectArray<T>::AssignableMemcpy(int32_t dst_pos,
+                                             ObjPtr<ObjectArray<T>> src,
+                                             int32_t src_pos,
+                                             int32_t count) {
   if (kIsDebugBuild) {
     for (int i = 0; i < count; ++i) {
       // The get will perform the VerifyObject.
@@ -175,17 +217,31 @@
   // Perform the memmove using int memcpy then perform the write barrier.
   static_assert(sizeof(HeapReference<T>) == sizeof(uint32_t),
                 "art::mirror::HeapReference<T> and uint32_t have different sizes.");
-  IntArray* dstAsIntArray = reinterpret_cast<IntArray*>(this);
-  IntArray* srcAsIntArray = reinterpret_cast<IntArray*>(src);
-  if (kUseReadBarrier) {
-    // TODO: Optimize this later?
+  // TODO: Optimize this later?
+  // We can't use memmove since it does not handle read barriers and may do by per byte copying.
+  // See b/32012820.
+  bool baker_non_gray_case = false;
+  if (kUseReadBarrier && kUseBakerReadBarrier) {
+    uintptr_t fake_address_dependency;
+    if (!ReadBarrier::IsGray(src.Ptr(), &fake_address_dependency)) {
+      baker_non_gray_case = true;
+      DCHECK_EQ(fake_address_dependency, 0U);
+      src.Assign(reinterpret_cast<ObjectArray<T>*>(
+          reinterpret_cast<uintptr_t>(src.Ptr()) | fake_address_dependency));
+      for (int i = 0; i < count; ++i) {
+        // We can skip the RB here because 'src' isn't gray.
+        Object* obj = src->template GetWithoutChecks<kDefaultVerifyFlags, kWithoutReadBarrier>(
+            src_pos + i);
+        SetWithoutChecksAndWriteBarrier<false>(dst_pos + i, obj);
+      }
+    }
+  }
+  if (!baker_non_gray_case) {
     for (int i = 0; i < count; ++i) {
       // We need a RB here. ObjectArray::GetWithoutChecks() contains a RB.
       T* obj = src->GetWithoutChecks(src_pos + i);
-      SetWithoutChecks<false>(dst_pos + i, obj);
+      SetWithoutChecksAndWriteBarrier<false>(dst_pos + i, obj);
     }
-  } else {
-    dstAsIntArray->Memcpy(dst_pos, srcAsIntArray, src_pos, count);
   }
   Runtime::Current()->GetHeap()->WriteBarrierArray(this, dst_pos, count);
   if (kIsDebugBuild) {
@@ -198,8 +254,10 @@
 
 template<class T>
 template<bool kTransactionActive>
-inline void ObjectArray<T>::AssignableCheckingMemcpy(int32_t dst_pos, ObjectArray<T>* src,
-                                                     int32_t src_pos, int32_t count,
+inline void ObjectArray<T>::AssignableCheckingMemcpy(int32_t dst_pos,
+                                                     ObjPtr<ObjectArray<T>> src,
+                                                     int32_t src_pos,
+                                                     int32_t count,
                                                      bool throw_exception) {
   DCHECK_NE(this, src)
       << "This case should be handled with memmove that handles overlaps correctly";
@@ -208,34 +266,69 @@
   Class* dst_class = GetClass()->GetComponentType();
   Class* lastAssignableElementClass = dst_class;
 
-  Object* o = nullptr;
+  T* o = nullptr;
   int i = 0;
-  for (; i < count; ++i) {
-    // The follow get operations force the objects to be verified.
-    // We need a RB here. ObjectArray::GetWithoutChecks() contains a RB.
-    o = src->GetWithoutChecks(src_pos + i);
-    if (o == nullptr) {
-      // Null is always assignable.
-      SetWithoutChecks<kTransactionActive>(dst_pos + i, nullptr);
-    } else {
-      // TODO: use the underlying class reference to avoid uncompression when not necessary.
-      Class* o_class = o->GetClass();
-      if (LIKELY(lastAssignableElementClass == o_class)) {
-        SetWithoutChecks<kTransactionActive>(dst_pos + i, o);
-      } else if (LIKELY(dst_class->IsAssignableFrom(o_class))) {
-        lastAssignableElementClass = o_class;
-        SetWithoutChecks<kTransactionActive>(dst_pos + i, o);
+  bool baker_non_gray_case = false;
+  if (kUseReadBarrier && kUseBakerReadBarrier) {
+    uintptr_t fake_address_dependency;
+    if (!ReadBarrier::IsGray(src.Ptr(), &fake_address_dependency)) {
+      baker_non_gray_case = true;
+      DCHECK_EQ(fake_address_dependency, 0U);
+      src.Assign(reinterpret_cast<ObjectArray<T>*>(
+          reinterpret_cast<uintptr_t>(src.Ptr()) | fake_address_dependency));
+      for (; i < count; ++i) {
+        // The follow get operations force the objects to be verified.
+        // We can skip the RB here because 'src' isn't gray.
+        o = src->template GetWithoutChecks<kDefaultVerifyFlags, kWithoutReadBarrier>(
+            src_pos + i);
+        if (o == nullptr) {
+          // Null is always assignable.
+          SetWithoutChecks<kTransactionActive>(dst_pos + i, nullptr);
+        } else {
+          // TODO: use the underlying class reference to avoid uncompression when not necessary.
+          Class* o_class = o->GetClass();
+          if (LIKELY(lastAssignableElementClass == o_class)) {
+            SetWithoutChecks<kTransactionActive>(dst_pos + i, o);
+          } else if (LIKELY(dst_class->IsAssignableFrom(o_class))) {
+            lastAssignableElementClass = o_class;
+            SetWithoutChecks<kTransactionActive>(dst_pos + i, o);
+          } else {
+            // Can't put this element into the array, break to perform write-barrier and throw
+            // exception.
+            break;
+          }
+        }
+      }
+    }
+  }
+  if (!baker_non_gray_case) {
+    for (; i < count; ++i) {
+      // The follow get operations force the objects to be verified.
+      // We need a RB here. ObjectArray::GetWithoutChecks() contains a RB.
+      o = src->GetWithoutChecks(src_pos + i);
+      if (o == nullptr) {
+        // Null is always assignable.
+        SetWithoutChecks<kTransactionActive>(dst_pos + i, nullptr);
       } else {
-        // Can't put this element into the array, break to perform write-barrier and throw
-        // exception.
-        break;
+        // TODO: use the underlying class reference to avoid uncompression when not necessary.
+        Class* o_class = o->GetClass();
+        if (LIKELY(lastAssignableElementClass == o_class)) {
+          SetWithoutChecks<kTransactionActive>(dst_pos + i, o);
+        } else if (LIKELY(dst_class->IsAssignableFrom(o_class))) {
+          lastAssignableElementClass = o_class;
+          SetWithoutChecks<kTransactionActive>(dst_pos + i, o);
+        } else {
+          // Can't put this element into the array, break to perform write-barrier and throw
+          // exception.
+          break;
+        }
       }
     }
   }
   Runtime::Current()->GetHeap()->WriteBarrierArray(this, dst_pos, count);
   if (UNLIKELY(i != count)) {
-    std::string actualSrcType(PrettyTypeOf(o));
-    std::string dstType(PrettyTypeOf(this));
+    std::string actualSrcType(mirror::Object::PrettyTypeOf(o));
+    std::string dstType(PrettyTypeOf());
     Thread* self = Thread::Current();
     if (throw_exception) {
       self->ThrowNewExceptionF("Ljava/lang/ArrayStoreException;",
@@ -266,8 +359,7 @@
 
 template<class T>
 inline MemberOffset ObjectArray<T>::OffsetOfElement(int32_t i) {
-  return MemberOffset(DataOffset(sizeof(HeapReference<Object>)).Int32Value() +
-                      (i * sizeof(HeapReference<Object>)));
+  return MemberOffset(DataOffset(kHeapReferenceSize).Int32Value() + (i * kHeapReferenceSize));
 }
 
 template<class T> template<typename Visitor>
diff --git a/runtime/mirror/object_array.h b/runtime/mirror/object_array.h
index 19b9d87..b7a9561 100644
--- a/runtime/mirror/object_array.h
+++ b/runtime/mirror/object_array.h
@@ -18,6 +18,7 @@
 #define ART_RUNTIME_MIRROR_OBJECT_ARRAY_H_
 
 #include "array.h"
+#include "obj_ptr.h"
 
 namespace art {
 namespace mirror {
@@ -30,11 +31,15 @@
     return Array::ClassSize(pointer_size);
   }
 
-  static ObjectArray<T>* Alloc(Thread* self, Class* object_array_class, int32_t length,
+  static ObjectArray<T>* Alloc(Thread* self,
+                               ObjPtr<Class> object_array_class,
+                               int32_t length,
                                gc::AllocatorType allocator_type)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
-  static ObjectArray<T>* Alloc(Thread* self, Class* object_array_class, int32_t length)
+  static ObjectArray<T>* Alloc(Thread* self,
+                               ObjPtr<Class> object_array_class,
+                               int32_t length)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
@@ -45,13 +50,13 @@
   // an ArrayStoreException and returns false.
   // TODO fix thread safety analysis: should be REQUIRES_SHARED(Locks::mutator_lock_).
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  bool CheckAssignable(T* object) NO_THREAD_SAFETY_ANALYSIS;
+  bool CheckAssignable(ObjPtr<T> object) NO_THREAD_SAFETY_ANALYSIS;
 
-  ALWAYS_INLINE void Set(int32_t i, T* object) REQUIRES_SHARED(Locks::mutator_lock_);
+  ALWAYS_INLINE void Set(int32_t i, ObjPtr<T> object) REQUIRES_SHARED(Locks::mutator_lock_);
   // TODO fix thread safety analysis: should be REQUIRES_SHARED(Locks::mutator_lock_).
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  ALWAYS_INLINE void Set(int32_t i, T* object) NO_THREAD_SAFETY_ANALYSIS;
+  ALWAYS_INLINE void Set(int32_t i, ObjPtr<T> object) NO_THREAD_SAFETY_ANALYSIS;
 
   // Set element without bound and element type checks, to be used in limited
   // circumstances, such as during boot image writing.
@@ -59,32 +64,44 @@
   // REQUIRES_SHARED(Locks::mutator_lock_).
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  ALWAYS_INLINE void SetWithoutChecks(int32_t i, T* object) NO_THREAD_SAFETY_ANALYSIS;
+  ALWAYS_INLINE void SetWithoutChecks(int32_t i, ObjPtr<T> object) NO_THREAD_SAFETY_ANALYSIS;
   // TODO fix thread safety analysis broken by the use of template. This should be
   // REQUIRES_SHARED(Locks::mutator_lock_).
   template<bool kTransactionActive, bool kCheckTransaction = true,
       VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
-  ALWAYS_INLINE void SetWithoutChecksAndWriteBarrier(int32_t i, T* object)
+  ALWAYS_INLINE void SetWithoutChecksAndWriteBarrier(int32_t i, ObjPtr<T> object)
       NO_THREAD_SAFETY_ANALYSIS;
 
+  template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags,
+           ReadBarrierOption kReadBarrierOption = kWithReadBarrier>
   ALWAYS_INLINE T* GetWithoutChecks(int32_t i) REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Copy src into this array (dealing with overlaps as memmove does) without assignability checks.
-  void AssignableMemmove(int32_t dst_pos, ObjectArray<T>* src, int32_t src_pos,
-                         int32_t count) REQUIRES_SHARED(Locks::mutator_lock_);
+  void AssignableMemmove(int32_t dst_pos,
+                         ObjPtr<ObjectArray<T>> src,
+                         int32_t src_pos,
+                         int32_t count)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Copy src into this array assuming no overlap and without assignability checks.
-  void AssignableMemcpy(int32_t dst_pos, ObjectArray<T>* src, int32_t src_pos,
-                        int32_t count) REQUIRES_SHARED(Locks::mutator_lock_);
+  void AssignableMemcpy(int32_t dst_pos,
+                        ObjPtr<ObjectArray<T>> src,
+                        int32_t src_pos,
+                        int32_t count)
+      REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Copy src into this array with assignability checks.
   template<bool kTransactionActive>
-  void AssignableCheckingMemcpy(int32_t dst_pos, ObjectArray<T>* src, int32_t src_pos,
-                                int32_t count, bool throw_exception)
+  void AssignableCheckingMemcpy(int32_t dst_pos,
+                                ObjPtr<ObjectArray<T>> src,
+                                int32_t src_pos,
+                                int32_t count,
+                                bool throw_exception)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   ObjectArray<T>* CopyOf(Thread* self, int32_t new_length)
-      REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(!Roles::uninterruptible_);
 
   static MemberOffset OffsetOfElement(int32_t i);
 
diff --git a/runtime/mirror/object_reference-inl.h b/runtime/mirror/object_reference-inl.h
new file mode 100644
index 0000000..e70b936
--- /dev/null
+++ b/runtime/mirror/object_reference-inl.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_MIRROR_OBJECT_REFERENCE_INL_H_
+#define ART_RUNTIME_MIRROR_OBJECT_REFERENCE_INL_H_
+
+#include "object_reference.h"
+
+#include "obj_ptr-inl.h"
+
+namespace art {
+namespace mirror {
+
+template<bool kPoisonReferences, class MirrorType>
+void ObjectReference<kPoisonReferences, MirrorType>::Assign(ObjPtr<MirrorType> ptr) {
+  Assign(ptr.Ptr());
+}
+
+template<class MirrorType>
+HeapReference<MirrorType> HeapReference<MirrorType>::FromObjPtr(ObjPtr<MirrorType> ptr) {
+  return HeapReference<MirrorType>(ptr.Ptr());
+}
+
+}  // namespace mirror
+}  // namespace art
+
+#endif  // ART_RUNTIME_MIRROR_OBJECT_REFERENCE_INL_H_
diff --git a/runtime/mirror/object_reference.h b/runtime/mirror/object_reference.h
index f4a3580..71f34c6 100644
--- a/runtime/mirror/object_reference.h
+++ b/runtime/mirror/object_reference.h
@@ -19,6 +19,7 @@
 
 #include "base/mutex.h"  // For Locks::mutator_lock_.
 #include "globals.h"
+#include "obj_ptr.h"
 
 namespace art {
 namespace mirror {
@@ -41,6 +42,9 @@
     reference_ = Compress(other);
   }
 
+  void Assign(ObjPtr<MirrorType> ptr)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
   void Clear() {
     reference_ = 0;
     DCHECK(IsNull());
@@ -86,11 +90,18 @@
       REQUIRES_SHARED(Locks::mutator_lock_) {
     return HeapReference<MirrorType>(mirror_ptr);
   }
+
+  static HeapReference<MirrorType> FromObjPtr(ObjPtr<MirrorType> ptr)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
  private:
   explicit HeapReference(MirrorType* mirror_ptr) REQUIRES_SHARED(Locks::mutator_lock_)
       : ObjectReference<kPoisonHeapReferences, MirrorType>(mirror_ptr) {}
 };
 
+static_assert(sizeof(mirror::HeapReference<mirror::Object>) == kHeapReferenceSize,
+              "heap reference size does not match");
+
 // Standard compressed reference used in the runtime. Used for StackReference and GC roots.
 template<class MirrorType>
 class MANAGED CompressedReference : public mirror::ObjectReference<false, MirrorType> {
diff --git a/runtime/mirror/object_test.cc b/runtime/mirror/object_test.cc
index 062afd3..5bf254d 100644
--- a/runtime/mirror/object_test.cc
+++ b/runtime/mirror/object_test.cc
@@ -139,10 +139,10 @@
   ASSERT_TRUE(oa->GetClass() != nullptr);
   Handle<mirror::Class> klass(hs.NewHandle(oa->GetClass()));
   ASSERT_EQ(2U, klass->NumDirectInterfaces());
-  EXPECT_EQ(class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Cloneable;"),
-            mirror::Class::GetDirectInterface(soa.Self(), klass, 0));
-  EXPECT_EQ(class_linker_->FindSystemClass(soa.Self(), "Ljava/io/Serializable;"),
-            mirror::Class::GetDirectInterface(soa.Self(), klass, 1));
+  EXPECT_OBJ_PTR_EQ(class_linker_->FindSystemClass(soa.Self(), "Ljava/lang/Cloneable;"),
+                    mirror::Class::GetDirectInterface(soa.Self(), klass, 0));
+  EXPECT_OBJ_PTR_EQ(class_linker_->FindSystemClass(soa.Self(), "Ljava/io/Serializable;"),
+                    mirror::Class::GetDirectInterface(soa.Self(), klass, 1));
 }
 
 TEST_F(ObjectTest, AllocArray) {
@@ -337,7 +337,7 @@
   dims->Set<false>(0, -1);
   multi = Array::CreateMultiArray(soa.Self(), c, dims);
   EXPECT_TRUE(soa.Self()->IsExceptionPending());
-  EXPECT_EQ(PrettyDescriptor(soa.Self()->GetException()->GetClass()),
+  EXPECT_EQ(mirror::Class::PrettyDescriptor(soa.Self()->GetException()->GetClass()),
             "java.lang.NegativeArraySizeException");
   soa.Self()->ClearException();
 
diff --git a/runtime/mirror/reference-inl.h b/runtime/mirror/reference-inl.h
index 039989b..a449b41 100644
--- a/runtime/mirror/reference-inl.h
+++ b/runtime/mirror/reference-inl.h
@@ -19,6 +19,8 @@
 
 #include "reference.h"
 
+#include "obj_ptr-inl.h"
+
 namespace art {
 namespace mirror {
 
@@ -27,6 +29,24 @@
   return Class::ComputeClassSize(false, vtable_entries, 2, 0, 0, 0, 0, pointer_size);
 }
 
+template<bool kTransactionActive>
+inline void Reference::SetReferent(ObjPtr<Object> referent) {
+  SetFieldObjectVolatile<kTransactionActive>(ReferentOffset(), referent);
+}
+
+inline void Reference::SetPendingNext(ObjPtr<Reference> pending_next) {
+  if (Runtime::Current()->IsActiveTransaction()) {
+    SetFieldObject<true>(PendingNextOffset(), pending_next);
+  } else {
+    SetFieldObject<false>(PendingNextOffset(), pending_next);
+  }
+}
+
+template<bool kTransactionActive>
+inline void FinalizerReference::SetZombie(ObjPtr<Object> zombie) {
+  return SetFieldObjectVolatile<kTransactionActive>(ZombieOffset(), zombie);
+}
+
 }  // namespace mirror
 }  // namespace art
 
diff --git a/runtime/mirror/reference.cc b/runtime/mirror/reference.cc
index 3c7f8c8..1d0b4c5 100644
--- a/runtime/mirror/reference.cc
+++ b/runtime/mirror/reference.cc
@@ -14,7 +14,7 @@
  * limitations under the License.
  */
 
-#include "reference.h"
+#include "reference-inl.h"
 
 #include "art_method.h"
 #include "gc_root-inl.h"
@@ -24,7 +24,7 @@
 
 GcRoot<Class> Reference::java_lang_ref_Reference_;
 
-void Reference::SetClass(Class* java_lang_ref_Reference) {
+void Reference::SetClass(ObjPtr<Class> java_lang_ref_Reference) {
   CHECK(java_lang_ref_Reference_.IsNull());
   CHECK(java_lang_ref_Reference != nullptr);
   java_lang_ref_Reference_ = GcRoot<Class>(java_lang_ref_Reference);
diff --git a/runtime/mirror/reference.h b/runtime/mirror/reference.h
index 6a8b32b..f2fa589 100644
--- a/runtime/mirror/reference.h
+++ b/runtime/mirror/reference.h
@@ -20,6 +20,7 @@
 #include "base/enums.h"
 #include "class.h"
 #include "gc_root.h"
+#include "obj_ptr.h"
 #include "object.h"
 #include "object_callbacks.h"
 #include "read_barrier_option.h"
@@ -69,9 +70,7 @@
         ReferentOffset());
   }
   template<bool kTransactionActive>
-  void SetReferent(Object* referent) REQUIRES_SHARED(Locks::mutator_lock_) {
-    SetFieldObjectVolatile<kTransactionActive>(ReferentOffset(), referent);
-  }
+  void SetReferent(ObjPtr<Object> referent) REQUIRES_SHARED(Locks::mutator_lock_);
   template<bool kTransactionActive>
   void ClearReferent() REQUIRES_SHARED(Locks::mutator_lock_) {
     SetFieldObjectVolatile<kTransactionActive>(ReferentOffset(), nullptr);
@@ -82,14 +81,7 @@
     return GetFieldObject<Reference, kDefaultVerifyFlags, kReadBarrierOption>(PendingNextOffset());
   }
 
-  void SetPendingNext(Reference* pending_next)
-      REQUIRES_SHARED(Locks::mutator_lock_) {
-    if (Runtime::Current()->IsActiveTransaction()) {
-      SetFieldObject<true>(PendingNextOffset(), pending_next);
-    } else {
-      SetFieldObject<false>(PendingNextOffset(), pending_next);
-    }
-  }
+  void SetPendingNext(ObjPtr<Reference> pending_next) REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Returns true if the reference's pendingNext is null, indicating it is
   // okay to process this reference.
@@ -112,7 +104,7 @@
     DCHECK(!java_lang_ref_Reference_.IsNull());
     return java_lang_ref_Reference_.Read<kReadBarrierOption>();
   }
-  static void SetClass(Class* klass);
+  static void SetClass(ObjPtr<Class> klass);
   static void ResetClass();
   static void VisitRoots(RootVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -144,9 +136,8 @@
   }
 
   template<bool kTransactionActive>
-  void SetZombie(Object* zombie) REQUIRES_SHARED(Locks::mutator_lock_) {
-    return SetFieldObjectVolatile<kTransactionActive>(ZombieOffset(), zombie);
-  }
+  void SetZombie(ObjPtr<Object> zombie) REQUIRES_SHARED(Locks::mutator_lock_);
+
   Object* GetZombie() REQUIRES_SHARED(Locks::mutator_lock_) {
     return GetFieldObjectVolatile<Object>(ZombieOffset());
   }
diff --git a/runtime/mirror/stack_trace_element.cc b/runtime/mirror/stack_trace_element.cc
index 96f6a53..c00cf91 100644
--- a/runtime/mirror/stack_trace_element.cc
+++ b/runtime/mirror/stack_trace_element.cc
@@ -28,7 +28,7 @@
 
 GcRoot<Class> StackTraceElement::java_lang_StackTraceElement_;
 
-void StackTraceElement::SetClass(Class* java_lang_StackTraceElement) {
+void StackTraceElement::SetClass(ObjPtr<Class> java_lang_StackTraceElement) {
   CHECK(java_lang_StackTraceElement_.IsNull());
   CHECK(java_lang_StackTraceElement != nullptr);
   java_lang_StackTraceElement_ = GcRoot<Class>(java_lang_StackTraceElement);
@@ -39,30 +39,34 @@
   java_lang_StackTraceElement_ = GcRoot<Class>(nullptr);
 }
 
-StackTraceElement* StackTraceElement::Alloc(Thread* self, Handle<String> declaring_class,
-                                            Handle<String> method_name, Handle<String> file_name,
+StackTraceElement* StackTraceElement::Alloc(Thread* self,
+                                            Handle<String> declaring_class,
+                                            Handle<String> method_name,
+                                            Handle<String> file_name,
                                             int32_t line_number) {
-  StackTraceElement* trace =
-      down_cast<StackTraceElement*>(GetStackTraceElement()->AllocObject(self));
+  ObjPtr<StackTraceElement> trace =
+      ObjPtr<StackTraceElement>::DownCast(GetStackTraceElement()->AllocObject(self));
   if (LIKELY(trace != nullptr)) {
     if (Runtime::Current()->IsActiveTransaction()) {
-      trace->Init<true>(declaring_class, method_name, file_name, line_number);
+      trace->Init<true>(declaring_class.Get(), method_name.Get(), file_name.Get(), line_number);
     } else {
-      trace->Init<false>(declaring_class, method_name, file_name, line_number);
+      trace->Init<false>(declaring_class.Get(), method_name.Get(), file_name.Get(), line_number);
     }
   }
-  return trace;
+  return trace.Ptr();
 }
 
 template<bool kTransactionActive>
-void StackTraceElement::Init(Handle<String> declaring_class, Handle<String> method_name,
-                             Handle<String> file_name, int32_t line_number) {
+void StackTraceElement::Init(ObjPtr<String> declaring_class,
+                             ObjPtr<String> method_name,
+                             ObjPtr<String> file_name,
+                             int32_t line_number) {
   SetFieldObject<kTransactionActive>(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, declaring_class_),
-                                     declaring_class.Get());
+                                     declaring_class);
   SetFieldObject<kTransactionActive>(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, method_name_),
-                                     method_name.Get());
+                                     method_name);
   SetFieldObject<kTransactionActive>(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, file_name_),
-                                     file_name.Get());
+                                     file_name);
   SetField32<kTransactionActive>(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, line_number_),
                                  line_number);
 }
diff --git a/runtime/mirror/stack_trace_element.h b/runtime/mirror/stack_trace_element.h
index 4b3d9d0..d32d8dc 100644
--- a/runtime/mirror/stack_trace_element.h
+++ b/runtime/mirror/stack_trace_element.h
@@ -47,12 +47,14 @@
     return GetField32(OFFSET_OF_OBJECT_MEMBER(StackTraceElement, line_number_));
   }
 
-  static StackTraceElement* Alloc(Thread* self, Handle<String> declaring_class,
-                                  Handle<String> method_name, Handle<String> file_name,
+  static StackTraceElement* Alloc(Thread* self,
+                                  Handle<String> declaring_class,
+                                  Handle<String> method_name,
+                                  Handle<String> file_name,
                                   int32_t line_number)
       REQUIRES_SHARED(Locks::mutator_lock_) REQUIRES(!Roles::uninterruptible_);
 
-  static void SetClass(Class* java_lang_StackTraceElement);
+  static void SetClass(ObjPtr<Class> java_lang_StackTraceElement);
   static void ResetClass();
   static void VisitRoots(RootVisitor* visitor)
       REQUIRES_SHARED(Locks::mutator_lock_);
@@ -69,7 +71,9 @@
   int32_t line_number_;
 
   template<bool kTransactionActive>
-  void Init(Handle<String> declaring_class, Handle<String> method_name, Handle<String> file_name,
+  void Init(ObjPtr<String> declaring_class,
+            ObjPtr<String> method_name,
+            ObjPtr<String> file_name,
             int32_t line_number)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
diff --git a/runtime/mirror/string-inl.h b/runtime/mirror/string-inl.h
index aea6ff1..6870fda 100644
--- a/runtime/mirror/string-inl.h
+++ b/runtime/mirror/string-inl.h
@@ -43,10 +43,10 @@
   explicit SetStringCountVisitor(int32_t count) : count_(count) {
   }
 
-  void operator()(Object* obj, size_t usable_size ATTRIBUTE_UNUSED) const
+  void operator()(ObjPtr<Object> obj, size_t usable_size ATTRIBUTE_UNUSED) const
       REQUIRES_SHARED(Locks::mutator_lock_) {
     // Avoid AsString as object is not yet in live bitmap or allocation stack.
-    String* string = down_cast<String*>(obj);
+    ObjPtr<String> string = ObjPtr<String>::DownCast(obj);
     string->SetCount(count_);
     DCHECK(!string->IsCompressed() || kUseStringCompression);
   }
@@ -63,10 +63,10 @@
       : count_(count), src_array_(src_array), offset_(offset), high_byte_(high_byte) {
   }
 
-  void operator()(Object* obj, size_t usable_size ATTRIBUTE_UNUSED) const
+  void operator()(ObjPtr<Object> obj, size_t usable_size ATTRIBUTE_UNUSED) const
       REQUIRES_SHARED(Locks::mutator_lock_) {
     // Avoid AsString as object is not yet in live bitmap or allocation stack.
-    String* string = down_cast<String*>(obj);
+    ObjPtr<String> string = ObjPtr<String>::DownCast(obj);
     string->SetCount(count_);
     DCHECK(!string->IsCompressed() || kUseStringCompression);
     int32_t length = String::GetLengthFromCount(count_);
@@ -99,16 +99,14 @@
     count_(count), src_array_(src_array), offset_(offset) {
   }
 
-  void operator()(Object* obj, size_t usable_size ATTRIBUTE_UNUSED) const
+  void operator()(ObjPtr<Object> obj, size_t usable_size ATTRIBUTE_UNUSED) const
       REQUIRES_SHARED(Locks::mutator_lock_) {
     // Avoid AsString as object is not yet in live bitmap or allocation stack.
-    String* string = down_cast<String*>(obj);
+    ObjPtr<String> string = ObjPtr<String>::DownCast(obj);
     string->SetCount(count_);
     const uint16_t* const src = src_array_->GetData() + offset_;
     const int32_t length = String::GetLengthFromCount(count_);
-    bool compressible = kUseStringCompression && String::GetCompressionFlagFromCount(count_);
-    DCHECK(!compressible || kUseStringCompression);
-    if (compressible) {
+    if (kUseStringCompression && String::IsCompressed(count_)) {
       for (int i = 0; i < length; ++i) {
         string->GetValueCompressed()[i] = static_cast<uint8_t>(src[i]);
       }
@@ -126,19 +124,19 @@
 // Sets string count and value in the allocation code path to ensure it is guarded by a CAS.
 class SetStringCountAndValueVisitorFromString {
  public:
-  SetStringCountAndValueVisitorFromString(int32_t count, Handle<String> src_string,
+  SetStringCountAndValueVisitorFromString(int32_t count,
+                                          Handle<String> src_string,
                                           int32_t offset) :
     count_(count), src_string_(src_string), offset_(offset) {
   }
 
-  void operator()(Object* obj, size_t usable_size ATTRIBUTE_UNUSED) const
+  void operator()(ObjPtr<Object> obj, size_t usable_size ATTRIBUTE_UNUSED) const
       REQUIRES_SHARED(Locks::mutator_lock_) {
     // Avoid AsString as object is not yet in live bitmap or allocation stack.
-    String* string = down_cast<String*>(obj);
+    ObjPtr<String> string = ObjPtr<String>::DownCast(obj);
     string->SetCount(count_);
     const int32_t length = String::GetLengthFromCount(count_);
-    bool compressible = kUseStringCompression && String::GetCompressionFlagFromCount(count_);
-    DCHECK(!compressible || kUseStringCompression);
+    bool compressible = kUseStringCompression && String::IsCompressed(count_);
     if (src_string_->IsCompressed()) {
       const uint8_t* const src = src_string_->GetValueCompressed() + offset_;
       memcpy(string->GetValueCompressed(), src, length * sizeof(uint8_t));
@@ -160,7 +158,7 @@
   const int32_t offset_;
 };
 
-inline String* String::Intern() {
+inline ObjPtr<String> String::Intern() {
   return Runtime::Current()->GetInternTable()->InternWeak(this);
 }
 
@@ -209,8 +207,7 @@
                              gc::AllocatorType allocator_type,
                              const PreFenceVisitor& pre_fence_visitor) {
   constexpr size_t header_size = sizeof(String);
-  const bool compressible = kUseStringCompression &&
-                            String::GetCompressionFlagFromCount(utf16_length_with_flag);
+  const bool compressible = kUseStringCompression && String::IsCompressed(utf16_length_with_flag);
   const size_t block_size = (compressible) ? sizeof(uint8_t) : sizeof(uint16_t);
   size_t length = String::GetLengthFromCount(utf16_length_with_flag);
   static_assert(sizeof(length) <= sizeof(size_t),
@@ -232,7 +229,7 @@
   const size_t max_length = RoundDown(max_alloc_length, kObjectAlignment / block_size);
   if (UNLIKELY(length > max_length)) {
     self->ThrowOutOfMemoryError(StringPrintf("%s of length %d would overflow",
-                                             PrettyDescriptor(string_class).c_str(),
+                                             Class::PrettyDescriptor(string_class).c_str(),
                                              static_cast<int>(length)).c_str());
     return nullptr;
   }
@@ -245,7 +242,7 @@
 
 template <bool kIsInstrumented>
 inline String* String::AllocEmptyString(Thread* self, gc::AllocatorType allocator_type) {
-  const int32_t length_with_flag = String::GetFlaggedCount(0);
+  const int32_t length_with_flag = String::GetFlaggedCount(0, /* compressible */ true);
   SetStringCountVisitor visitor(length_with_flag);
   return Alloc<kIsInstrumented>(self, length_with_flag, allocator_type, visitor);
 }
@@ -255,10 +252,9 @@
                                           Handle<ByteArray> array, int32_t offset,
                                           int32_t high_byte, gc::AllocatorType allocator_type) {
   const uint8_t* const src = reinterpret_cast<uint8_t*>(array->GetData()) + offset;
-  const bool compressible = kUseStringCompression && String::AllASCII<uint8_t>(src, byte_length)
-                                            && (high_byte == 0);
-  const int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(byte_length)
-                                                  : byte_length;
+  const bool compressible =
+      kUseStringCompression && String::AllASCII<uint8_t>(src, byte_length) && (high_byte == 0);
+  const int32_t length_with_flag = String::GetFlaggedCount(byte_length, compressible);
   SetStringCountAndBytesVisitor visitor(length_with_flag, array, offset, high_byte << 8);
   String* string = Alloc<kIsInstrumented>(self, length_with_flag, allocator_type, visitor);
   return string;
@@ -272,7 +268,7 @@
   DCHECK_GE(array->GetLength(), count);
   const bool compressible = kUseStringCompression &&
                             String::AllASCII<uint16_t>(array->GetData() + offset, count);
-  const int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(count) : count;
+  const int32_t length_with_flag = String::GetFlaggedCount(count, compressible);
   SetStringCountAndValueVisitorFromCharArray visitor(length_with_flag, array, offset);
   String* new_string = Alloc<kIsInstrumented>(self, length_with_flag, allocator_type, visitor);
   return new_string;
@@ -284,8 +280,7 @@
   const bool compressible = kUseStringCompression &&
       ((string->IsCompressed()) ? true : String::AllASCII<uint16_t>(string->GetValue() + offset,
                                                                     string_length));
-  const int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(string_length)
-                                                  : string_length;
+  const int32_t length_with_flag = String::GetFlaggedCount(string_length, compressible);
   SetStringCountAndValueVisitorFromString visitor(length_with_flag, string, offset);
   String* new_string = Alloc<kIsInstrumented>(self, length_with_flag, allocator_type, visitor);
   return new_string;
@@ -311,7 +306,7 @@
 template<typename MemoryType>
 bool String::AllASCII(const MemoryType* const chars, const int length) {
   for (int i = 0; i < length; ++i) {
-    if (chars[i] > 0x80) {
+    if (chars[i] >= 0x80) {
       return false;
     }
   }
diff --git a/runtime/mirror/string.cc b/runtime/mirror/string.cc
index 46caa4d..0ab0bd6 100644
--- a/runtime/mirror/string.cc
+++ b/runtime/mirror/string.cc
@@ -48,7 +48,7 @@
   }
 }
 
-void String::SetClass(Class* java_lang_String) {
+void String::SetClass(ObjPtr<Class> java_lang_String) {
   CHECK(java_lang_String_.IsNull());
   CHECK(java_lang_String != nullptr);
   CHECK(java_lang_String->IsStringClass());
@@ -93,12 +93,12 @@
   int32_t length = string->GetLength();
   int32_t length2 = string2->GetLength();
   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
-  const bool compressible = kUseStringCompression && (string->IsCompressed() && string2->IsCompressed());
-  const int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(length + length2)
-                                                  : (length + length2);
+  const bool compressible = kUseStringCompression &&
+      (string->IsCompressed() && string2->IsCompressed());
+  const int32_t length_with_flag = String::GetFlaggedCount(length + length2, compressible);
 
   SetStringCountVisitor visitor(length_with_flag);
-  String* new_string = Alloc<true>(self, length_with_flag, allocator_type, visitor);
+  ObjPtr<String> new_string = Alloc<true>(self, length_with_flag, allocator_type, visitor);
   if (UNLIKELY(new_string == nullptr)) {
     return nullptr;
   }
@@ -123,7 +123,7 @@
       memcpy(new_value + length, string2->GetValue(), length2 * sizeof(uint16_t));
     }
   }
-  return new_string;
+  return new_string.Ptr();
 }
 
 String* String::AllocFromUtf16(Thread* self, int32_t utf16_length, const uint16_t* utf16_data_in) {
@@ -131,10 +131,9 @@
   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
   const bool compressible = kUseStringCompression &&
                             String::AllASCII<uint16_t>(utf16_data_in, utf16_length);
-  int32_t length_with_flag = (compressible) ? String::GetFlaggedCount(utf16_length)
-                                            : utf16_length;
+  int32_t length_with_flag = String::GetFlaggedCount(utf16_length, compressible);
   SetStringCountVisitor visitor(length_with_flag);
-  String* string = Alloc<true>(self, length_with_flag, allocator_type, visitor);
+  ObjPtr<String> string = Alloc<true>(self, length_with_flag, allocator_type, visitor);
   if (UNLIKELY(string == nullptr)) {
     return nullptr;
   }
@@ -146,7 +145,7 @@
     uint16_t* array = string->GetValue();
     memcpy(array, utf16_data_in, utf16_length * sizeof(uint16_t));
   }
-  return string;
+  return string.Ptr();
 }
 
 String* String::AllocFromModifiedUtf8(Thread* self, const char* utf) {
@@ -156,18 +155,21 @@
   return AllocFromModifiedUtf8(self, char_count, utf, byte_count);
 }
 
-String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length, const char* utf8_data_in) {
+String* String::AllocFromModifiedUtf8(Thread* self,
+                                      int32_t utf16_length,
+                                      const char* utf8_data_in) {
   return AllocFromModifiedUtf8(self, utf16_length, utf8_data_in, strlen(utf8_data_in));
 }
 
-String* String::AllocFromModifiedUtf8(Thread* self, int32_t utf16_length,
-                                      const char* utf8_data_in, int32_t utf8_length) {
+String* String::AllocFromModifiedUtf8(Thread* self,
+                                      int32_t utf16_length,
+                                      const char* utf8_data_in,
+                                      int32_t utf8_length) {
   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
   const bool compressible = kUseStringCompression && (utf16_length == utf8_length);
-  const int32_t utf16_length_with_flag = (compressible) ? String::GetFlaggedCount(utf16_length)
-                                                        : utf16_length;
+  const int32_t utf16_length_with_flag = String::GetFlaggedCount(utf16_length, compressible);
   SetStringCountVisitor visitor(utf16_length_with_flag);
-  String* string = Alloc<true>(self, utf16_length_with_flag, allocator_type, visitor);
+  ObjPtr<String> string = Alloc<true>(self, utf16_length_with_flag, allocator_type, visitor);
   if (UNLIKELY(string == nullptr)) {
     return nullptr;
   }
@@ -177,10 +179,10 @@
     uint16_t* utf16_data_out = string->GetValue();
     ConvertModifiedUtf8ToUtf16(utf16_data_out, utf16_length, utf8_data_in, utf8_length);
   }
-  return string;
+  return string.Ptr();
 }
 
-bool String::Equals(String* that) {
+bool String::Equals(ObjPtr<String> that) {
   if (this == that) {
     // Quick reference equality test
     return true;
@@ -281,42 +283,47 @@
   return result;
 }
 
-int32_t String::CompareTo(String* rhs) {
+int32_t String::CompareTo(ObjPtr<String> rhs) {
   // Quick test for comparison of a string with itself.
-  String* lhs = this;
+  ObjPtr<String> lhs = this;
   if (lhs == rhs) {
     return 0;
   }
-  // TODO: is this still true?
-  // The annoying part here is that 0x00e9 - 0xffff != 0x00ea,
-  // because the interpreter converts the characters to 32-bit integers
-  // *without* sign extension before it subtracts them (which makes some
-  // sense since "char" is unsigned).  So what we get is the result of
-  // 0x000000e9 - 0x0000ffff, which is 0xffff00ea.
-  int32_t lhsCount = lhs->GetLength();
-  int32_t rhsCount = rhs->GetLength();
-  int32_t countDiff = lhsCount - rhsCount;
-  int32_t minCount = (countDiff < 0) ? lhsCount : rhsCount;
+  int32_t lhs_count = lhs->GetLength();
+  int32_t rhs_count = rhs->GetLength();
+  int32_t count_diff = lhs_count - rhs_count;
+  int32_t min_count = (count_diff < 0) ? lhs_count : rhs_count;
   if (lhs->IsCompressed() && rhs->IsCompressed()) {
-    int32_t comparison = memcmp(lhs->GetValueCompressed(), rhs->GetValueCompressed(), minCount * sizeof(uint8_t));
-    if (comparison != 0) {
-      return comparison;
+    const uint8_t* lhs_chars = lhs->GetValueCompressed();
+    const uint8_t* rhs_chars = rhs->GetValueCompressed();
+    for (int32_t i = 0; i < min_count; ++i) {
+      int32_t char_diff = static_cast<int32_t>(lhs_chars[i]) - static_cast<int32_t>(rhs_chars[i]);
+      if (char_diff != 0) {
+        return char_diff;
+      }
     }
   } else if (lhs->IsCompressed() || rhs->IsCompressed()) {
-    for (int32_t i = 0; i < minCount; ++i) {
-      if (lhs->CharAt(i) != rhs->CharAt(i)) {
-        return static_cast<int32_t>(lhs->CharAt(i)) - static_cast<int32_t>(rhs->CharAt(i));
+    const uint8_t* compressed_chars =
+        lhs->IsCompressed() ? lhs->GetValueCompressed() : rhs->GetValueCompressed();
+    const uint16_t* uncompressed_chars = lhs->IsCompressed() ? rhs->GetValue() : lhs->GetValue();
+    for (int32_t i = 0; i < min_count; ++i) {
+      int32_t char_diff =
+          static_cast<int32_t>(compressed_chars[i]) - static_cast<int32_t>(uncompressed_chars[i]);
+      if (char_diff != 0) {
+        return lhs->IsCompressed() ? char_diff : -char_diff;
       }
     }
   } else {
-    const uint16_t* lhsChars = lhs->GetValue();
-    const uint16_t* rhsChars = rhs->GetValue();
-    int32_t otherRes = MemCmp16(lhsChars, rhsChars, minCount);
-    if (otherRes != 0) {
-      return otherRes;
+    const uint16_t* lhs_chars = lhs->GetValue();
+    const uint16_t* rhs_chars = rhs->GetValue();
+    // FIXME: The MemCmp16() name is misleading. It returns the char difference on mismatch
+    // where memcmp() only guarantees that the returned value has the same sign.
+    int32_t char_diff = MemCmp16(lhs_chars, rhs_chars, min_count);
+    if (char_diff != 0) {
+      return char_diff;
     }
   }
-  return countDiff;
+  return count_diff;
 }
 
 void String::VisitRoots(RootVisitor* visitor) {
@@ -326,7 +333,7 @@
 CharArray* String::ToCharArray(Thread* self) {
   StackHandleScope<1> hs(self);
   Handle<String> string(hs.NewHandle(this));
-  CharArray* result = CharArray::Alloc(self, GetLength());
+  ObjPtr<CharArray> result = CharArray::Alloc(self, GetLength());
   if (result != nullptr) {
     if (string->IsCompressed()) {
       int32_t length = string->GetLength();
@@ -339,7 +346,7 @@
   } else {
     self->AssertPendingOOMException();
   }
-  return result;
+  return result.Ptr();
 }
 
 void String::GetChars(int32_t start, int32_t end, Handle<CharArray> array, int32_t index) {
@@ -358,5 +365,16 @@
   return (IsCompressed()) ? (GetValueCompressed() == nullptr) : (GetValue() == nullptr);
 }
 
+std::string String::PrettyStringDescriptor(ObjPtr<mirror::String> java_descriptor) {
+  if (java_descriptor == nullptr) {
+    return "null";
+  }
+  return java_descriptor->PrettyStringDescriptor();
+}
+
+std::string String::PrettyStringDescriptor() {
+  return PrettyDescriptor(ToModifiedUtf8().c_str());
+}
+
 }  // namespace mirror
 }  // namespace art
diff --git a/runtime/mirror/string.h b/runtime/mirror/string.h
index a18692f..95b6c3e 100644
--- a/runtime/mirror/string.h
+++ b/runtime/mirror/string.h
@@ -33,6 +33,10 @@
 
 // String Compression
 static constexpr bool kUseStringCompression = false;
+enum class StringCompressionFlag : uint32_t {
+    kCompressed = 0u,
+    kUncompressed = 1u
+};
 
 // C++ mirror of java.lang.String
 class MANAGED String FINAL : public Object {
@@ -78,7 +82,6 @@
   void SetCount(int32_t new_count) REQUIRES_SHARED(Locks::mutator_lock_) {
     // Count is invariant so use non-transactional mode. Also disable check as we may run inside
     // a transaction.
-    DCHECK_LE(0, (new_count & INT32_MAX));
     SetField32<false, false>(OFFSET_OF_OBJECT_MEMBER(String, count_), new_count);
   }
 
@@ -93,7 +96,7 @@
 
   void SetCharAt(int32_t index, uint16_t c) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  String* Intern() REQUIRES_SHARED(Locks::mutator_lock_);
+  ObjPtr<String> Intern() REQUIRES_SHARED(Locks::mutator_lock_);
 
   template <bool kIsInstrumented>
   ALWAYS_INLINE static String* AllocFromByteArray(Thread* self, int32_t byte_length,
@@ -146,7 +149,7 @@
   bool Equals(const StringPiece& modified_utf8)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  bool Equals(String* that) REQUIRES_SHARED(Locks::mutator_lock_);
+  bool Equals(ObjPtr<String> that) REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Compare UTF-16 code point values not in a locale-sensitive manner
   int Compare(int32_t utf16_length, const char* utf8_data_in);
@@ -165,7 +168,7 @@
   int32_t FastIndexOf(MemoryType* chars, int32_t ch, int32_t start)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  int32_t CompareTo(String* other) REQUIRES_SHARED(Locks::mutator_lock_);
+  int32_t CompareTo(ObjPtr<String> other) REQUIRES_SHARED(Locks::mutator_lock_);
 
   CharArray* ToCharArray(Thread* self) REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!Roles::uninterruptible_);
@@ -175,7 +178,7 @@
 
   template<VerifyObjectFlags kVerifyFlags = kDefaultVerifyFlags>
   bool IsCompressed() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return kUseStringCompression && GetCompressionFlagFromCount(GetCount());
+    return kUseStringCompression && IsCompressed(GetCount());
   }
 
   bool IsValueNull() REQUIRES_SHARED(Locks::mutator_lock_);
@@ -183,16 +186,27 @@
   template<typename MemoryType>
   static bool AllASCII(const MemoryType* const chars, const int length);
 
-  ALWAYS_INLINE static bool GetCompressionFlagFromCount(const int32_t count) {
-    return kUseStringCompression && ((count & (1u << 31)) != 0);
+  ALWAYS_INLINE static bool IsCompressed(int32_t count) {
+    return GetCompressionFlagFromCount(count) == StringCompressionFlag::kCompressed;
   }
 
-  ALWAYS_INLINE static int32_t GetLengthFromCount(const int32_t count) {
-    return kUseStringCompression ? (count & INT32_MAX) : count;
+  ALWAYS_INLINE static StringCompressionFlag GetCompressionFlagFromCount(int32_t count) {
+    return kUseStringCompression
+        ? static_cast<StringCompressionFlag>(static_cast<uint32_t>(count) & 1u)
+        : StringCompressionFlag::kUncompressed;
   }
 
-  ALWAYS_INLINE static int32_t GetFlaggedCount(const int32_t count) {
-    return kUseStringCompression ? (count | (1u << 31)) : count;
+  ALWAYS_INLINE static int32_t GetLengthFromCount(int32_t count) {
+    return kUseStringCompression ? static_cast<int32_t>(static_cast<uint32_t>(count) >> 1) : count;
+  }
+
+  ALWAYS_INLINE static int32_t GetFlaggedCount(int32_t length, bool compressible) {
+    return kUseStringCompression
+        ? static_cast<int32_t>((static_cast<uint32_t>(length) << 1) |
+                               (static_cast<uint32_t>(compressible
+                                                          ? StringCompressionFlag::kCompressed
+                                                          : StringCompressionFlag::kUncompressed)))
+        : length;
   }
 
   static Class* GetJavaLangString() REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -200,10 +214,18 @@
     return java_lang_String_.Read();
   }
 
-  static void SetClass(Class* java_lang_String) REQUIRES_SHARED(Locks::mutator_lock_);
+  static void SetClass(ObjPtr<Class> java_lang_String) REQUIRES_SHARED(Locks::mutator_lock_);
   static void ResetClass() REQUIRES_SHARED(Locks::mutator_lock_);
   static void VisitRoots(RootVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // Returns a human-readable equivalent of 'descriptor'. So "I" would be "int",
+  // "[[I" would be "int[][]", "[Ljava/lang/String;" would be
+  // "java.lang.String[]", and so forth.
+  static std::string PrettyStringDescriptor(ObjPtr<mirror::String> descriptor)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+  std::string PrettyStringDescriptor()
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
  private:
   void SetHashCode(int32_t new_hash_code) REQUIRES_SHARED(Locks::mutator_lock_) {
     // Hash code is invariant so use non-transactional mode. Also disable check as we may run inside
diff --git a/runtime/mirror/throwable.cc b/runtime/mirror/throwable.cc
index 8f3ed84..b866a63 100644
--- a/runtime/mirror/throwable.cc
+++ b/runtime/mirror/throwable.cc
@@ -33,7 +33,7 @@
 
 GcRoot<Class> Throwable::java_lang_Throwable_;
 
-void Throwable::SetDetailMessage(String* new_detail_message) {
+void Throwable::SetDetailMessage(ObjPtr<String> new_detail_message) {
   if (Runtime::Current()->IsActiveTransaction()) {
     SetFieldObject<true>(OFFSET_OF_OBJECT_MEMBER(Throwable, detail_message_), new_detail_message);
   } else {
@@ -42,7 +42,7 @@
   }
 }
 
-void Throwable::SetCause(Throwable* cause) {
+void Throwable::SetCause(ObjPtr<Throwable> cause) {
   CHECK(cause != nullptr);
   CHECK(cause != this);
   Throwable* current_cause = GetFieldObject<Throwable>(OFFSET_OF_OBJECT_MEMBER(Throwable, cause_));
@@ -54,7 +54,7 @@
   }
 }
 
-void Throwable::SetStackState(Object* state) REQUIRES_SHARED(Locks::mutator_lock_) {
+void Throwable::SetStackState(ObjPtr<Object> state) REQUIRES_SHARED(Locks::mutator_lock_) {
   CHECK(state != nullptr);
   if (Runtime::Current()->IsActiveTransaction()) {
     SetFieldObjectVolatile<true>(OFFSET_OF_OBJECT_MEMBER(Throwable, backtrace_), state);
@@ -71,11 +71,11 @@
 }
 
 int32_t Throwable::GetStackDepth() {
-  Object* stack_state = GetStackState();
+  ObjPtr<Object> stack_state = GetStackState();
   if (stack_state == nullptr || !stack_state->IsObjectArray()) {
     return -1;
   }
-  mirror::ObjectArray<mirror::Object>* const trace = stack_state->AsObjectArray<mirror::Object>();
+  ObjPtr<mirror::ObjectArray<Object>> const trace = stack_state->AsObjectArray<Object>();
   const int32_t array_len = trace->GetLength();
   DCHECK_GT(array_len, 0);
   // See method BuildInternalStackTraceVisitor::Init for the format.
@@ -83,24 +83,23 @@
 }
 
 std::string Throwable::Dump() {
-  std::string result(PrettyTypeOf(this));
+  std::string result(PrettyTypeOf());
   result += ": ";
-  String* msg = GetDetailMessage();
+  ObjPtr<String> msg = GetDetailMessage();
   if (msg != nullptr) {
     result += msg->ToModifiedUtf8();
   }
   result += "\n";
-  Object* stack_state = GetStackState();
+  ObjPtr<Object> stack_state = GetStackState();
   // check stack state isn't missing or corrupt
   if (stack_state != nullptr && stack_state->IsObjectArray()) {
-    mirror::ObjectArray<mirror::Object>* object_array =
-        stack_state->AsObjectArray<mirror::Object>();
+    ObjPtr<ObjectArray<Object>> object_array = stack_state->AsObjectArray<Object>();
     // Decode the internal stack trace into the depth and method trace
     // See method BuildInternalStackTraceVisitor::Init for the format.
     DCHECK_GT(object_array->GetLength(), 0);
-    mirror::Object* methods_and_dex_pcs = object_array->Get(0);
+    ObjPtr<Object> methods_and_dex_pcs = object_array->Get(0);
     DCHECK(methods_and_dex_pcs->IsIntArray() || methods_and_dex_pcs->IsLongArray());
-    mirror::PointerArray* method_trace = down_cast<mirror::PointerArray*>(methods_and_dex_pcs);
+    ObjPtr<PointerArray> method_trace = ObjPtr<PointerArray>::DownCast(methods_and_dex_pcs);
     const int32_t array_len = method_trace->GetLength();
     CHECK_EQ(array_len % 2, 0);
     const auto depth = array_len / 2;
@@ -113,16 +112,17 @@
         uintptr_t dex_pc = method_trace->GetElementPtrSize<uintptr_t>(i + depth, ptr_size);
         int32_t line_number = method->GetLineNumFromDexPC(dex_pc);
         const char* source_file = method->GetDeclaringClassSourceFile();
-        result += StringPrintf("  at %s (%s:%d)\n", PrettyMethod(method, true).c_str(),
+        result += StringPrintf("  at %s (%s:%d)\n", method->PrettyMethod(true).c_str(),
                                source_file, line_number);
       }
     }
   } else {
-    Object* stack_trace = GetStackTrace();
+    ObjPtr<Object> stack_trace = GetStackTrace();
     if (stack_trace != nullptr && stack_trace->IsObjectArray()) {
       CHECK_EQ(stack_trace->GetClass()->GetComponentType(),
                StackTraceElement::GetStackTraceElement());
-      auto* ste_array = down_cast<ObjectArray<StackTraceElement>*>(stack_trace);
+      ObjPtr<ObjectArray<StackTraceElement>> ste_array =
+          ObjPtr<ObjectArray<StackTraceElement>>::DownCast(stack_trace);
       if (ste_array->GetLength() == 0) {
         result += "(Throwable with empty stack trace)";
       } else {
@@ -142,7 +142,7 @@
       result += "(Throwable with no stack trace)";
     }
   }
-  Throwable* cause = GetFieldObject<Throwable>(OFFSET_OF_OBJECT_MEMBER(Throwable, cause_));
+  ObjPtr<Throwable> cause = GetFieldObject<Throwable>(OFFSET_OF_OBJECT_MEMBER(Throwable, cause_));
   if (cause != nullptr && cause != this) {  // Constructor makes cause == this by default.
     result += "Caused by: ";
     result += cause->Dump();
@@ -150,7 +150,7 @@
   return result;
 }
 
-void Throwable::SetClass(Class* java_lang_Throwable) {
+void Throwable::SetClass(ObjPtr<Class> java_lang_Throwable) {
   CHECK(java_lang_Throwable_.IsNull());
   CHECK(java_lang_Throwable != nullptr);
   java_lang_Throwable_ = GcRoot<Class>(java_lang_Throwable);
diff --git a/runtime/mirror/throwable.h b/runtime/mirror/throwable.h
index 76824cb..0a4ab6f 100644
--- a/runtime/mirror/throwable.h
+++ b/runtime/mirror/throwable.h
@@ -31,7 +31,7 @@
 // C++ mirror of java.lang.Throwable
 class MANAGED Throwable : public Object {
  public:
-  void SetDetailMessage(String* new_detail_message) REQUIRES_SHARED(Locks::mutator_lock_);
+  void SetDetailMessage(ObjPtr<String> new_detail_message) REQUIRES_SHARED(Locks::mutator_lock_);
 
   String* GetDetailMessage() REQUIRES_SHARED(Locks::mutator_lock_) {
     return GetFieldObject<String>(OFFSET_OF_OBJECT_MEMBER(Throwable, detail_message_));
@@ -42,8 +42,8 @@
   // This is a runtime version of initCause, you shouldn't use it if initCause may have been
   // overridden. Also it asserts rather than throwing exceptions. Currently this is only used
   // in cases like the verifier where the checks cannot fail and initCause isn't overridden.
-  void SetCause(Throwable* cause) REQUIRES_SHARED(Locks::mutator_lock_);
-  void SetStackState(Object* state) REQUIRES_SHARED(Locks::mutator_lock_);
+  void SetCause(ObjPtr<Throwable> cause) REQUIRES_SHARED(Locks::mutator_lock_);
+  void SetStackState(ObjPtr<Object> state) REQUIRES_SHARED(Locks::mutator_lock_);
   bool IsCheckedException() REQUIRES_SHARED(Locks::mutator_lock_);
 
   static Class* GetJavaLangThrowable() REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -53,7 +53,7 @@
 
   int32_t GetStackDepth() REQUIRES_SHARED(Locks::mutator_lock_);
 
-  static void SetClass(Class* java_lang_Throwable);
+  static void SetClass(ObjPtr<Class> java_lang_Throwable);
   static void ResetClass();
   static void VisitRoots(RootVisitor* visitor)
       REQUIRES_SHARED(Locks::mutator_lock_);
diff --git a/runtime/monitor.cc b/runtime/monitor.cc
index debbdd5..e7de7e6 100644
--- a/runtime/monitor.cc
+++ b/runtime/monitor.cc
@@ -307,7 +307,7 @@
   std::ostringstream oss;
   oss << "monitor contention with owner " << owner_name << " (" << owner_tid << ")";
   if (owners_method != nullptr) {
-    oss << " at " << PrettyMethod(owners_method);
+    oss << " at " << owners_method->PrettyMethod();
     oss << "(" << owners_filename << ":" << owners_line_number << ")";
   }
   oss << " waiters=" << num_waiters;
@@ -377,8 +377,8 @@
             int32_t line_number;
             TranslateLocation(m, pc, &filename, &line_number);
             oss << " blocking from "
-                << PrettyMethod(m) << "(" << (filename != nullptr ? filename : "null") << ":"
-                << line_number << ")";
+                << ArtMethod::PrettyMethod(m) << "(" << (filename != nullptr ? filename : "null")
+                << ":" << line_number << ")";
             ATRACE_BEGIN(oss.str().c_str());
           }
           monitor_contenders_.Wait(self);  // Still contended so wait.
@@ -420,7 +420,8 @@
                                             owners_method,
                                             owners_dex_pc,
                                             num_waiters)
-                    << " in " << PrettyMethod(m) << " for " << PrettyDuration(MsToNs(wait_ms));
+                    << " in " << ArtMethod::PrettyMethod(m) << " for "
+                    << PrettyDuration(MsToNs(wait_ms));
               }
               const char* owners_filename;
               int32_t owners_line_number;
@@ -503,14 +504,14 @@
     if (found_owner_thread_id == 0u) {
       ThrowIllegalMonitorStateExceptionF("unlock of unowned monitor on object of type '%s'"
                                          " on thread '%s'",
-                                         PrettyTypeOf(o).c_str(),
+                                         mirror::Object::PrettyTypeOf(o).c_str(),
                                          expected_owner_string.c_str());
     } else {
       // Race: the original read found an owner but now there is none
       ThrowIllegalMonitorStateExceptionF("unlock of monitor owned by '%s' on object of type '%s'"
                                          " (where now the monitor appears unowned) on thread '%s'",
                                          found_owner_string.c_str(),
-                                         PrettyTypeOf(o).c_str(),
+                                         mirror::Object::PrettyTypeOf(o).c_str(),
                                          expected_owner_string.c_str());
     }
   } else {
@@ -519,7 +520,7 @@
       ThrowIllegalMonitorStateExceptionF("unlock of monitor owned by '%s' on object of type '%s'"
                                          " (originally believed to be unowned) on thread '%s'",
                                          current_owner_string.c_str(),
-                                         PrettyTypeOf(o).c_str(),
+                                         mirror::Object::PrettyTypeOf(o).c_str(),
                                          expected_owner_string.c_str());
     } else {
       if (found_owner_thread_id != current_owner_thread_id) {
@@ -528,13 +529,13 @@
                                            " owned by '%s') on object of type '%s' on thread '%s'",
                                            found_owner_string.c_str(),
                                            current_owner_string.c_str(),
-                                           PrettyTypeOf(o).c_str(),
+                                           mirror::Object::PrettyTypeOf(o).c_str(),
                                            expected_owner_string.c_str());
       } else {
         ThrowIllegalMonitorStateExceptionF("unlock of monitor owned by '%s' on object of type '%s'"
                                            " on thread '%s",
                                            current_owner_string.c_str(),
-                                           PrettyTypeOf(o).c_str(),
+                                           mirror::Object::PrettyTypeOf(o).c_str(),
                                            expected_owner_string.c_str());
       }
     }
@@ -770,7 +771,7 @@
         return false;
       }
       // Can't deflate if our lock count is too high.
-      if (monitor->lock_count_ > LockWord::kThinLockMaxCount) {
+      if (static_cast<uint32_t>(monitor->lock_count_) > LockWord::kThinLockMaxCount) {
         return false;
       }
       // Deflate to a thin lock.
@@ -1144,12 +1145,12 @@
         // current thread, which isn't safe if this is the only runnable thread.
         os << wait_message << StringPrintf("<@addr=0x%" PRIxPTR "> (a %s)",
                                            reinterpret_cast<intptr_t>(pretty_object),
-                                           PrettyTypeOf(pretty_object).c_str());
+                                           pretty_object->PrettyTypeOf().c_str());
       } else {
         // - waiting on <0x6008c468> (a java.lang.Class<java.lang.ref.ReferenceQueue>)
         // Call PrettyTypeOf before IdentityHashCode since IdentityHashCode can cause thread
         // suspension and move pretty_object.
-        const std::string pretty_type(PrettyTypeOf(pretty_object));
+        const std::string pretty_type(pretty_object->PrettyTypeOf());
         os << wait_message << StringPrintf("<0x%08x> (a %s)", pretty_object->IdentityHashCode(),
                                            pretty_type.c_str());
       }
@@ -1201,7 +1202,7 @@
 
   // Is there any reason to believe there's any synchronization in this method?
   const DexFile::CodeItem* code_item = m->GetCodeItem();
-  CHECK(code_item != nullptr) << PrettyMethod(m);
+  CHECK(code_item != nullptr) << m->PrettyMethod();
   if (code_item->tries_size_ == 0) {
     return;  // No "tries" implies no synchronization, so no held locks to report.
   }
@@ -1211,7 +1212,7 @@
   // inconsistent stack anyways.
   uint32_t dex_pc = stack_visitor->GetDexPc(abort_on_failure);
   if (!abort_on_failure && dex_pc == DexFile::kDexNoIndex) {
-    LOG(ERROR) << "Could not find dex_pc for " << PrettyMethod(m);
+    LOG(ERROR) << "Could not find dex_pc for " << m->PrettyMethod();
     return;
   }
 
@@ -1234,7 +1235,7 @@
     uint32_t value;
     bool success = stack_visitor->GetVReg(m, monitor_register, kReferenceVReg, &value);
     CHECK(success) << "Failed to read v" << monitor_register << " of kind "
-                   << kReferenceVReg << " in method " << PrettyMethod(m);
+                   << kReferenceVReg << " in method " << m->PrettyMethod();
     mirror::Object* o = reinterpret_cast<mirror::Object*>(value);
     callback(o, callback_context);
   }
@@ -1329,7 +1330,6 @@
 }
 
 void MonitorList::BroadcastForNewMonitors() {
-  CHECK(kUseReadBarrier);
   Thread* self = Thread::Current();
   MutexLock mu(self, monitor_list_lock_);
   monitor_add_condition_.Broadcast(self);
@@ -1340,6 +1340,9 @@
   MutexLock mu(self, monitor_list_lock_);
   while (UNLIKELY((!kUseReadBarrier && !allow_new_monitors_) ||
                   (kUseReadBarrier && !self->GetWeakRefAccessEnabled()))) {
+    // Check and run the empty checkpoint before blocking so the empty checkpoint will work in the
+    // presence of threads blocking for weak ref access.
+    self->CheckEmptyCheckpoint();
     monitor_add_condition_.WaitHoldingLocks(self);
   }
   list_.push_front(m);
diff --git a/runtime/monitor_android.cc b/runtime/monitor_android.cc
index 0d1839b..1dd60f8 100644
--- a/runtime/monitor_android.cc
+++ b/runtime/monitor_android.cc
@@ -21,7 +21,7 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 
-#include <android/log.h>
+#include <log/log.h>
 
 #define EVENT_LOG_TAG_dvm_lock_sample 20003
 
diff --git a/runtime/monitor_test.cc b/runtime/monitor_test.cc
index 4ee46dc..4fbfe47 100644
--- a/runtime/monitor_test.cc
+++ b/runtime/monitor_test.cc
@@ -401,14 +401,11 @@
   Thread* const self = Thread::Current();
   ThreadPool thread_pool("the pool", 2);
   ScopedObjectAccess soa(self);
-  StackHandleScope<3> hs(self);
+  StackHandleScope<1> hs(self);
   Handle<mirror::Object> obj1(
       hs.NewHandle<mirror::Object>(mirror::String::AllocFromModifiedUtf8(self, "hello, world!")));
-  Handle<mirror::Object> obj2(
-      hs.NewHandle<mirror::Object>(mirror::String::AllocFromModifiedUtf8(self, "hello, world!")));
   {
     ObjectLock<mirror::Object> lock1(self, obj1);
-    ObjectLock<mirror::Object> lock2(self, obj1);
     {
       ObjectTryLock<mirror::Object> trylock(self, obj1);
       EXPECT_TRUE(trylock.Acquired());
diff --git a/runtime/native/dalvik_system_DexFile.cc b/runtime/native/dalvik_system_DexFile.cc
index 086da60..df0849a 100644
--- a/runtime/native/dalvik_system_DexFile.cc
+++ b/runtime/native/dalvik_system_DexFile.cc
@@ -218,7 +218,7 @@
   {
     ScopedObjectAccess soa(env);
     ObjPtr<mirror::Object> dex_files_object = soa.Decode<mirror::Object>(cookie);
-    mirror::LongArray* long_dex_files = dex_files_object->AsLongArray();
+    ObjPtr<mirror::LongArray> long_dex_files = dex_files_object->AsLongArray();
     // Delete dex files associated with this dalvik.system.DexFile since there should not be running
     // code using it. dex_files is a vector due to multidex.
     ClassLinker* const class_linker = runtime->GetClassLinker();
@@ -279,15 +279,15 @@
       Handle<mirror::ClassLoader> class_loader(
           hs.NewHandle(soa.Decode<mirror::ClassLoader>(javaLoader)));
       class_linker->RegisterDexFile(*dex_file, class_loader.Get());
-      mirror::Class* result = class_linker->DefineClass(soa.Self(),
-                                                        descriptor.c_str(),
-                                                        hash,
-                                                        class_loader,
-                                                        *dex_file,
-                                                        *dex_class_def);
+      ObjPtr<mirror::Class> result = class_linker->DefineClass(soa.Self(),
+                                                               descriptor.c_str(),
+                                                               hash,
+                                                               class_loader,
+                                                               *dex_file,
+                                                               *dex_class_def);
       // Add the used dex file. This only required for the DexFile.loadClass API since normal
       // class loaders already keep their dex files live.
-      class_linker->InsertDexFileInToClassLoader(soa.Decode<mirror::Object>(dexFile).Ptr(),
+      class_linker->InsertDexFileInToClassLoader(soa.Decode<mirror::Object>(dexFile),
                                                  class_loader.Get());
       if (result != nullptr) {
         VLOG(class_linker) << "DexFile_defineClassNative returning " << result
diff --git a/runtime/native/dalvik_system_InMemoryDexClassLoader_DexData.cc b/runtime/native/dalvik_system_InMemoryDexClassLoader_DexData.cc
index e32545b..db245aa 100644
--- a/runtime/native/dalvik_system_InMemoryDexClassLoader_DexData.cc
+++ b/runtime/native/dalvik_system_InMemoryDexClassLoader_DexData.cc
@@ -150,14 +150,18 @@
     Handle<mirror::ClassLoader> class_loader(
         handle_scope.NewHandle(soa.Decode<mirror::ClassLoader>(loader)));
     class_linker->RegisterDexFile(*dex_file, class_loader.Get());
-    mirror::Class* result = class_linker->DefineClass(
-        soa.Self(), class_descriptor, hash, class_loader, *dex_file, *dex_class_def);
+    ObjPtr<mirror::Class> result = class_linker->DefineClass(
+        soa.Self(),
+        class_descriptor,
+        hash, class_loader,
+        *dex_file,
+        *dex_class_def);
     if (result != nullptr) {
       // Ensure the class table has a strong reference to the
       // InMemoryClassLoader/DexData instance now that a class has
       // been loaded.
-      class_linker->InsertDexFileInToClassLoader(
-          soa.Decode<mirror::Object>(dexData).Ptr(), class_loader.Get());
+      class_linker->InsertDexFileInToClassLoader(soa.Decode<mirror::Object>(dexData),
+                                                 class_loader.Get());
       return soa.AddLocalReference<jclass>(result);
     }
   }
diff --git a/runtime/native/dalvik_system_VMDebug.cc b/runtime/native/dalvik_system_VMDebug.cc
index 8e81bc9..adf35b6 100644
--- a/runtime/native/dalvik_system_VMDebug.cc
+++ b/runtime/native/dalvik_system_VMDebug.cc
@@ -254,7 +254,9 @@
   LOG(INFO) << "VMDebug infopoint " << id << " hit";
 }
 
-static jlong VMDebug_countInstancesOfClass(JNIEnv* env, jclass, jclass javaClass,
+static jlong VMDebug_countInstancesOfClass(JNIEnv* env,
+                                           jclass,
+                                           jclass javaClass,
                                            jboolean countAssignable) {
   ScopedObjectAccess soa(env);
   gc::Heap* const heap = Runtime::Current()->GetHeap();
@@ -263,13 +265,16 @@
   if (c == nullptr) {
     return 0;
   }
-  std::vector<mirror::Class*> classes {c.Ptr()};
+  VariableSizedHandleScope hs(soa.Self());
+  std::vector<Handle<mirror::Class>> classes {hs.NewHandle(c)};
   uint64_t count = 0;
   heap->CountInstances(classes, countAssignable, &count);
   return count;
 }
 
-static jlongArray VMDebug_countInstancesOfClasses(JNIEnv* env, jclass, jobjectArray javaClasses,
+static jlongArray VMDebug_countInstancesOfClasses(JNIEnv* env,
+                                                  jclass,
+                                                  jobjectArray javaClasses,
                                                   jboolean countAssignable) {
   ScopedObjectAccess soa(env);
   gc::Heap* const heap = Runtime::Current()->GetHeap();
@@ -279,14 +284,15 @@
   if (decoded_classes == nullptr) {
     return nullptr;
   }
-  std::vector<mirror::Class*> classes;
+  VariableSizedHandleScope hs(soa.Self());
+  std::vector<Handle<mirror::Class>> classes;
   for (size_t i = 0, count = decoded_classes->GetLength(); i < count; ++i) {
-    classes.push_back(decoded_classes->Get(i));
+    classes.push_back(hs.NewHandle(decoded_classes->Get(i)));
   }
   std::vector<uint64_t> counts(classes.size(), 0u);
   // Heap::CountInstances can handle null and will put 0 for these classes.
   heap->CountInstances(classes, countAssignable, &counts[0]);
-  auto* long_counts = mirror::LongArray::Alloc(soa.Self(), counts.size());
+  ObjPtr<mirror::LongArray> long_counts = mirror::LongArray::Alloc(soa.Self(), counts.size());
   if (long_counts == nullptr) {
     soa.Self()->AssertPendingOOMException();
     return nullptr;
@@ -414,8 +420,10 @@
   }
 }
 
-static bool SetRuntimeStatValue(JNIEnv* env, jobjectArray result, VMDebugRuntimeStatId id,
-                                std::string value) {
+static bool SetRuntimeStatValue(JNIEnv* env,
+                                jobjectArray result,
+                                VMDebugRuntimeStatId id,
+                                const std::string& value) {
   ScopedLocalRef<jstring> jvalue(env, env->NewStringUTF(value.c_str()));
   if (jvalue.get() == nullptr) {
     return false;
@@ -476,6 +484,31 @@
   return result;
 }
 
+static void VMDebug_attachAgent(JNIEnv* env, jclass, jstring agent) {
+  if (agent == nullptr) {
+    ScopedObjectAccess soa(env);
+    ThrowNullPointerException("agent is null");
+    return;
+  }
+
+  if (!Dbg::IsJdwpAllowed()) {
+    ScopedObjectAccess soa(env);
+    ThrowSecurityException("Can't attach agent, process is not debuggable.");
+    return;
+  }
+
+  std::string filename;
+  {
+    ScopedUtfChars chars(env, agent);
+    if (env->ExceptionCheck()) {
+      return;
+    }
+    filename = chars.c_str();
+  }
+
+  Runtime::Current()->AttachAgent(filename);
+}
+
 static JNINativeMethod gMethods[] = {
   NATIVE_METHOD(VMDebug, countInstancesOfClass, "(Ljava/lang/Class;Z)J"),
   NATIVE_METHOD(VMDebug, countInstancesOfClasses, "([Ljava/lang/Class;Z)[J"),
@@ -508,7 +541,8 @@
   NATIVE_METHOD(VMDebug, stopMethodTracing, "()V"),
   NATIVE_METHOD(VMDebug, threadCpuTimeNanos, "!()J"),
   NATIVE_METHOD(VMDebug, getRuntimeStatInternal, "(I)Ljava/lang/String;"),
-  NATIVE_METHOD(VMDebug, getRuntimeStatsInternal, "()[Ljava/lang/String;")
+  NATIVE_METHOD(VMDebug, getRuntimeStatsInternal, "()[Ljava/lang/String;"),
+  NATIVE_METHOD(VMDebug, attachAgent, "(Ljava/lang/String;)V"),
 };
 
 void register_dalvik_system_VMDebug(JNIEnv* env) {
diff --git a/runtime/native/dalvik_system_VMRuntime.cc b/runtime/native/dalvik_system_VMRuntime.cc
index e458e2d..866dc7f 100644
--- a/runtime/native/dalvik_system_VMRuntime.cc
+++ b/runtime/native/dalvik_system_VMRuntime.cc
@@ -74,21 +74,23 @@
     ThrowNegativeArraySizeException(length);
     return nullptr;
   }
-  mirror::Class* element_class = soa.Decode<mirror::Class>(javaElementClass).Ptr();
+  ObjPtr<mirror::Class> element_class = soa.Decode<mirror::Class>(javaElementClass);
   if (UNLIKELY(element_class == nullptr)) {
     ThrowNullPointerException("element class == null");
     return nullptr;
   }
   Runtime* runtime = Runtime::Current();
-  mirror::Class* array_class =
+  ObjPtr<mirror::Class> array_class =
       runtime->GetClassLinker()->FindArrayClass(soa.Self(), &element_class);
   if (UNLIKELY(array_class == nullptr)) {
     return nullptr;
   }
   gc::AllocatorType allocator = runtime->GetHeap()->GetCurrentNonMovingAllocator();
-  mirror::Array* result = mirror::Array::Alloc<true>(soa.Self(), array_class, length,
-                                                     array_class->GetComponentSizeShift(),
-                                                     allocator);
+  ObjPtr<mirror::Array> result = mirror::Array::Alloc<true>(soa.Self(),
+                                                            array_class,
+                                                            length,
+                                                            array_class->GetComponentSizeShift(),
+                                                            allocator);
   return soa.AddLocalReference<jobject>(result);
 }
 
@@ -99,21 +101,24 @@
     ThrowNegativeArraySizeException(length);
     return nullptr;
   }
-  mirror::Class* element_class = soa.Decode<mirror::Class>(javaElementClass).Ptr();
+  ObjPtr<mirror::Class> element_class = soa.Decode<mirror::Class>(javaElementClass);
   if (UNLIKELY(element_class == nullptr)) {
     ThrowNullPointerException("element class == null");
     return nullptr;
   }
   Runtime* runtime = Runtime::Current();
-  mirror::Class* array_class = runtime->GetClassLinker()->FindArrayClass(soa.Self(),
-                                                                         &element_class);
+  ObjPtr<mirror::Class> array_class = runtime->GetClassLinker()->FindArrayClass(soa.Self(),
+                                                                                &element_class);
   if (UNLIKELY(array_class == nullptr)) {
     return nullptr;
   }
   gc::AllocatorType allocator = runtime->GetHeap()->GetCurrentAllocator();
-  mirror::Array* result = mirror::Array::Alloc<true, true>(soa.Self(), array_class, length,
-                                                           array_class->GetComponentSizeShift(),
-                                                           allocator);
+  ObjPtr<mirror::Array> result = mirror::Array::Alloc<true, true>(
+      soa.Self(),
+      array_class,
+      length,
+      array_class->GetComponentSizeShift(),
+      allocator);
   return soa.AddLocalReference<jobject>(result);
 }
 
@@ -127,7 +132,7 @@
     ThrowIllegalArgumentException("not an array");
     return 0;
   }
-  if (Runtime::Current()->GetHeap()->IsMovableObject(array.Ptr())) {
+  if (Runtime::Current()->GetHeap()->IsMovableObject(array)) {
     ThrowRuntimeException("Trying to get address of movable array object");
     return 0;
   }
@@ -263,7 +268,7 @@
   Runtime::Current()->GetHeap()->GetTaskProcessor()->RunAllTasks(ThreadForEnv(env));
 }
 
-typedef std::map<std::string, mirror::String*> StringTable;
+typedef std::map<std::string, ObjPtr<mirror::String>> StringTable;
 
 class PreloadDexCachesStringsVisitor : public SingleRootVisitor {
  public:
@@ -271,7 +276,7 @@
 
   void VisitRoot(mirror::Object* root, const RootInfo& info ATTRIBUTE_UNUSED)
       OVERRIDE REQUIRES_SHARED(Locks::mutator_lock_) {
-    mirror::String* string = root->AsString();
+    ObjPtr<mirror::String> string = root->AsString();
     table_->operator[](string->ToModifiedUtf8()) = string;
   }
 
@@ -283,7 +288,7 @@
 static void PreloadDexCachesResolveString(
     Handle<mirror::DexCache> dex_cache, uint32_t string_idx, StringTable& strings)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  mirror::String* string = dex_cache->GetResolvedString(string_idx);
+  ObjPtr<mirror::String>  string = dex_cache->GetResolvedString(string_idx);
   if (string != nullptr) {
     return;
   }
@@ -298,10 +303,11 @@
 }
 
 // Based on ClassLinker::ResolveType.
-static void PreloadDexCachesResolveType(
-    Thread* self, mirror::DexCache* dex_cache, uint32_t type_idx)
+static void PreloadDexCachesResolveType(Thread* self,
+                                        ObjPtr<mirror::DexCache> dex_cache,
+                                        uint32_t type_idx)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  mirror::Class* klass = dex_cache->GetResolvedType(type_idx);
+  ObjPtr<mirror::Class> klass = dex_cache->GetResolvedType(type_idx);
   if (klass != nullptr) {
     return;
   }
@@ -311,7 +317,7 @@
   if (class_name[1] == '\0') {
     klass = linker->FindPrimitiveClass(class_name[0]);
   } else {
-    klass = linker->LookupClass(self, class_name, ComputeModifiedUtf8Hash(class_name), nullptr);
+    klass = linker->LookupClass(self, class_name, nullptr);
   }
   if (klass == nullptr) {
     return;
@@ -350,7 +356,6 @@
   if (field == nullptr) {
     return;
   }
-  // LOG(INFO) << "VMRuntime.preloadDexCaches resolved field " << PrettyField(field);
   dex_cache->SetResolvedField(field_idx, field, kRuntimePointerSize);
 }
 
@@ -364,7 +369,7 @@
   }
   const DexFile* dex_file = dex_cache->GetDexFile();
   const DexFile::MethodId& method_id = dex_file->GetMethodId(method_idx);
-  mirror::Class* klass = dex_cache->GetResolvedType(method_id.class_idx_);
+  ObjPtr<mirror::Class> klass = dex_cache->GetResolvedType(method_id.class_idx_);
   if (klass == nullptr) {
     return;
   }
@@ -387,7 +392,6 @@
   if (method == nullptr) {
     return;
   }
-  // LOG(INFO) << "VMRuntime.preloadDexCaches resolved method " << PrettyMethod(method);
   dex_cache->SetResolvedMethod(method_idx, method, kRuntimePointerSize);
 }
 
@@ -439,19 +443,19 @@
   Thread* const self = Thread::Current();
   for (const DexFile* dex_file : class_linker->GetBootClassPath()) {
     CHECK(dex_file != nullptr);
-    mirror::DexCache* const dex_cache = class_linker->FindDexCache(self, *dex_file, true);
+    ObjPtr<mirror::DexCache> const dex_cache = class_linker->FindDexCache(self, *dex_file, true);
     // If dex cache was deallocated, just continue.
     if (dex_cache == nullptr) {
       continue;
     }
     for (size_t j = 0; j < dex_cache->NumStrings(); j++) {
-      mirror::String* string = dex_cache->GetResolvedString(j);
+      ObjPtr<mirror::String> string = dex_cache->GetResolvedString(j);
       if (string != nullptr) {
         filled->num_strings++;
       }
     }
     for (size_t j = 0; j < dex_cache->NumResolvedTypes(); j++) {
-      mirror::Class* klass = dex_cache->GetResolvedType(j);
+      ObjPtr<mirror::Class> klass = dex_cache->GetResolvedType(j);
       if (klass != nullptr) {
         filled->num_types++;
       }
diff --git a/runtime/native/dalvik_system_VMStack.cc b/runtime/native/dalvik_system_VMStack.cc
index 0dd8cdd..36825cb 100644
--- a/runtime/native/dalvik_system_VMStack.cc
+++ b/runtime/native/dalvik_system_VMStack.cc
@@ -87,10 +87,10 @@
 
     bool VisitFrame() REQUIRES_SHARED(Locks::mutator_lock_) {
       DCHECK(class_loader == nullptr);
-      mirror::Class* c = GetMethod()->GetDeclaringClass();
+      ObjPtr<mirror::Class> c = GetMethod()->GetDeclaringClass();
       // c is null for runtime methods.
       if (c != nullptr) {
-        mirror::Object* cl = c->GetClassLoader();
+        ObjPtr<mirror::Object> cl = c->GetClassLoader();
         if (cl != nullptr) {
           class_loader = cl;
           return false;
@@ -99,7 +99,7 @@
       return true;
     }
 
-    mirror::Object* class_loader;
+    ObjPtr<mirror::Object> class_loader;
   };
   ScopedFastNativeObjectAccess soa(env);
   ClosestUserClassLoaderVisitor visitor(soa.Self());
diff --git a/runtime/native/java_lang_Class.cc b/runtime/native/java_lang_Class.cc
index 87bff5f..642826c 100644
--- a/runtime/native/java_lang_Class.cc
+++ b/runtime/native/java_lang_Class.cc
@@ -278,7 +278,7 @@
 
     uint32_t num_direct_interfaces = h_clazz->NumDirectInterfaces();
     for (uint32_t i = 0; i < num_direct_interfaces; i++) {
-      mirror::Class *iface = mirror::Class::GetDirectInterface(self, h_clazz, i);
+      ObjPtr<mirror::Class> iface = mirror::Class::GetDirectInterface(self, h_clazz, i);
       if (UNLIKELY(iface == nullptr)) {
         self->AssertPendingException();
         return nullptr;
@@ -354,8 +354,8 @@
   ObjPtr<mirror::Constructor> result =
       mirror::Class::GetDeclaredConstructorInternal<kRuntimePointerSize, false>(
       soa.Self(),
-      DecodeClass(soa, javaThis).Ptr(),
-      soa.Decode<mirror::ObjectArray<mirror::Class>>(args).Ptr());
+      DecodeClass(soa, javaThis),
+      soa.Decode<mirror::ObjectArray<mirror::Class>>(args));
   return soa.AddLocalReference<jobject>(result);
 }
 
@@ -403,11 +403,12 @@
   ScopedFastNativeObjectAccess soa(env);
   DCHECK_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
   DCHECK(!Runtime::Current()->IsActiveTransaction());
-  mirror::Method* result = mirror::Class::GetDeclaredMethodInternal<kRuntimePointerSize, false>(
-      soa.Self(),
-      DecodeClass(soa, javaThis).Ptr(),
-      soa.Decode<mirror::String>(name).Ptr(),
-      soa.Decode<mirror::ObjectArray<mirror::Class>>(args).Ptr());
+  ObjPtr<mirror::Method> result =
+      mirror::Class::GetDeclaredMethodInternal<kRuntimePointerSize, false>(
+          soa.Self(),
+          DecodeClass(soa, javaThis),
+          soa.Decode<mirror::String>(name),
+          soa.Decode<mirror::ObjectArray<mirror::Class>>(args));
   return soa.AddLocalReference<jobject>(result);
 }
 
@@ -496,13 +497,13 @@
       // Pending exception from GetDeclaredClasses.
       return nullptr;
     }
-    mirror::Class* class_class = mirror::Class::GetJavaLangClass();
-    mirror::Class* class_array_class =
+    ObjPtr<mirror::Class> class_class = mirror::Class::GetJavaLangClass();
+    ObjPtr<mirror::Class> class_array_class =
         Runtime::Current()->GetClassLinker()->FindArrayClass(soa.Self(), &class_class);
     if (class_array_class == nullptr) {
       return nullptr;
     }
-    mirror::ObjectArray<mirror::Class>* empty_array =
+    ObjPtr<mirror::ObjectArray<mirror::Class>> empty_array =
         mirror::ObjectArray<mirror::Class>::Alloc(soa.Self(), class_array_class, 0);
     return soa.AddLocalReference<jobjectArray>(empty_array);
   }
@@ -526,7 +527,7 @@
   if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
     return nullptr;
   }
-  mirror::Object* method = annotations::GetEnclosingMethod(klass);
+  ObjPtr<mirror::Object> method = annotations::GetEnclosingMethod(klass);
   if (method != nullptr) {
     if (soa.Decode<mirror::Class>(WellKnownClasses::java_lang_reflect_Constructor) ==
         method->GetClass()) {
@@ -543,7 +544,7 @@
   if (klass->IsProxyClass() || klass->GetDexCache() == nullptr) {
     return nullptr;
   }
-  mirror::Object* method = annotations::GetEnclosingMethod(klass);
+  ObjPtr<mirror::Object> method = annotations::GetEnclosingMethod(klass);
   if (method != nullptr) {
     if (soa.Decode<mirror::Class>(WellKnownClasses::java_lang_reflect_Method) ==
         method->GetClass()) {
@@ -632,7 +633,8 @@
   if (UNLIKELY(klass->GetPrimitiveType() != 0 || klass->IsInterface() || klass->IsArrayClass() ||
                klass->IsAbstract())) {
     soa.Self()->ThrowNewExceptionF("Ljava/lang/InstantiationException;",
-                                   "%s cannot be instantiated", PrettyClass(klass.Get()).c_str());
+                                   "%s cannot be instantiated",
+                                   klass->PrettyClass().c_str());
     return nullptr;
   }
   auto caller = hs.NewHandle<mirror::Class>(nullptr);
@@ -642,7 +644,7 @@
     if (caller.Get() != nullptr && !caller->CanAccess(klass.Get())) {
       soa.Self()->ThrowNewExceptionF(
           "Ljava/lang/IllegalAccessException;", "%s is not accessible from %s",
-          PrettyClass(klass.Get()).c_str(), PrettyClass(caller.Get()).c_str());
+          klass->PrettyClass().c_str(), caller->PrettyClass().c_str());
       return nullptr;
     }
   }
@@ -653,13 +655,13 @@
   if (UNLIKELY(constructor == nullptr)) {
     soa.Self()->ThrowNewExceptionF("Ljava/lang/InstantiationException;",
                                    "%s has no zero argument constructor",
-                                   PrettyClass(klass.Get()).c_str());
+                                   klass->PrettyClass().c_str());
     return nullptr;
   }
   // Invoke the string allocator to return an empty string for the string class.
   if (klass->IsStringClass()) {
     gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
-    mirror::Object* obj = mirror::String::AllocEmptyString<true>(soa.Self(), allocator_type);
+    ObjPtr<mirror::Object> obj = mirror::String::AllocEmptyString<true>(soa.Self(), allocator_type);
     if (UNLIKELY(soa.Self()->IsExceptionPending())) {
       return nullptr;
     } else {
@@ -677,14 +679,13 @@
     if (caller.Get() == nullptr) {
       caller.Assign(GetCallingClass(soa.Self(), 1));
     }
-    if (UNLIKELY(caller.Get() != nullptr && !VerifyAccess(
-        MakeObjPtr(receiver.Get()),
-        MakeObjPtr(declaring_class),
-        constructor->GetAccessFlags(),
-        MakeObjPtr(caller.Get())))) {
+    if (UNLIKELY(caller.Get() != nullptr && !VerifyAccess(receiver.Get(),
+                                                          declaring_class,
+                                                          constructor->GetAccessFlags(),
+                                                          caller.Get()))) {
       soa.Self()->ThrowNewExceptionF(
           "Ljava/lang/IllegalAccessException;", "%s is not accessible from %s",
-          PrettyMethod(constructor).c_str(), PrettyClass(caller.Get()).c_str());
+          constructor->PrettyMethod().c_str(), caller->PrettyClass().c_str());
       return nullptr;
     }
   }
diff --git a/runtime/native/java_lang_DexCache.cc b/runtime/native/java_lang_DexCache.cc
index 1fd7ed1..71379a5 100644
--- a/runtime/native/java_lang_DexCache.cc
+++ b/runtime/native/java_lang_DexCache.cc
@@ -68,7 +68,7 @@
   ScopedFastNativeObjectAccess soa(env);
   ObjPtr<mirror::DexCache> dex_cache = soa.Decode<mirror::DexCache>(javaDexCache);
   CHECK_LT(static_cast<size_t>(type_index), dex_cache->NumResolvedTypes());
-  dex_cache->SetResolvedType(type_index, soa.Decode<mirror::Class>(type).Ptr());
+  dex_cache->SetResolvedType(type_index, soa.Decode<mirror::Class>(type));
 }
 
 static void DexCache_setResolvedString(JNIEnv* env, jobject javaDexCache, jint string_index,
@@ -76,7 +76,7 @@
   ScopedFastNativeObjectAccess soa(env);
   ObjPtr<mirror::DexCache> dex_cache = soa.Decode<mirror::DexCache>(javaDexCache);
   CHECK_LT(static_cast<size_t>(string_index), dex_cache->GetDexFile()->NumStringIds());
-  dex_cache->SetResolvedString(string_index, soa.Decode<mirror::String>(string).Ptr());
+  dex_cache->SetResolvedString(string_index, soa.Decode<mirror::String>(string));
 }
 
 static JNINativeMethod gMethods[] = {
diff --git a/runtime/native/java_lang_String.cc b/runtime/native/java_lang_String.cc
index 5a49c20..ea266d1 100644
--- a/runtime/native/java_lang_String.cc
+++ b/runtime/native/java_lang_String.cc
@@ -57,7 +57,8 @@
   int32_t length_this = string_this->GetLength();
   int32_t length_arg = string_arg->GetLength();
   if (length_arg > 0 && length_this > 0) {
-    mirror::String* result = mirror::String::AllocFromStrings(soa.Self(), string_this, string_arg);
+    ObjPtr<mirror::String> result =
+        mirror::String::AllocFromStrings(soa.Self(), string_this, string_arg);
     return soa.AddLocalReference<jstring>(result);
   }
   jobject string_original = (length_this == 0) ? java_string_arg : java_this;
@@ -76,8 +77,11 @@
   StackHandleScope<1> hs(soa.Self());
   Handle<mirror::String> string_this(hs.NewHandle(soa.Decode<mirror::String>(java_this)));
   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
-  mirror::String* result = mirror::String::AllocFromString<true>(soa.Self(), length, string_this,
-                                                                 start, allocator_type);
+  ObjPtr<mirror::String> result = mirror::String::AllocFromString<true>(soa.Self(),
+                                                                        length,
+                                                                        string_this,
+                                                                        start,
+                                                                        allocator_type);
   return soa.AddLocalReference<jstring>(result);
 }
 
diff --git a/runtime/native/java_lang_StringFactory.cc b/runtime/native/java_lang_StringFactory.cc
index 119f2b8..e0738a4 100644
--- a/runtime/native/java_lang_StringFactory.cc
+++ b/runtime/native/java_lang_StringFactory.cc
@@ -44,9 +44,12 @@
     return nullptr;
   }
   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
-  mirror::String* result = mirror::String::AllocFromByteArray<true>(soa.Self(), byte_count,
-                                                                    byte_array, offset, high,
-                                                                    allocator_type);
+  ObjPtr<mirror::String> result = mirror::String::AllocFromByteArray<true>(soa.Self(),
+                                                                           byte_count,
+                                                                           byte_array,
+                                                                           offset,
+                                                                           high,
+                                                                           allocator_type);
   return soa.AddLocalReference<jstring>(result);
 }
 
@@ -58,9 +61,11 @@
   StackHandleScope<1> hs(soa.Self());
   Handle<mirror::CharArray> char_array(hs.NewHandle(soa.Decode<mirror::CharArray>(java_data)));
   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
-  mirror::String* result = mirror::String::AllocFromCharArray<true>(soa.Self(), char_count,
-                                                                    char_array, offset,
-                                                                    allocator_type);
+  ObjPtr<mirror::String> result = mirror::String::AllocFromCharArray<true>(soa.Self(),
+                                                                           char_count,
+                                                                           char_array,
+                                                                           offset,
+                                                                           allocator_type);
   return soa.AddLocalReference<jstring>(result);
 }
 
@@ -73,8 +78,11 @@
   StackHandleScope<1> hs(soa.Self());
   Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String>(to_copy)));
   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
-  mirror::String* result = mirror::String::AllocFromString<true>(soa.Self(), string->GetLength(),
-                                                                 string, 0, allocator_type);
+  ObjPtr<mirror::String> result = mirror::String::AllocFromString<true>(soa.Self(),
+                                                                        string->GetLength(),
+                                                                        string,
+                                                                        0,
+                                                                        allocator_type);
   return soa.AddLocalReference<jstring>(result);
 }
 
diff --git a/runtime/native/java_lang_System.cc b/runtime/native/java_lang_System.cc
index f3756a2..7f8da80 100644
--- a/runtime/native/java_lang_System.cc
+++ b/runtime/native/java_lang_System.cc
@@ -35,9 +35,10 @@
  * References are never torn regardless of the number of bits used to represent them.
  */
 
-static void ThrowArrayStoreException_NotAnArray(const char* identifier, mirror::Object* array)
+static void ThrowArrayStoreException_NotAnArray(const char* identifier,
+                                                ObjPtr<mirror::Object> array)
     REQUIRES_SHARED(Locks::mutator_lock_) {
-  std::string actualType(PrettyTypeOf(array));
+  std::string actualType(mirror::Object::PrettyTypeOf(array));
   Thread* self = Thread::Current();
   self->ThrowNewExceptionF("Ljava/lang/ArrayStoreException;",
                            "%s of type %s is not an array", identifier, actualType.c_str());
@@ -62,16 +63,16 @@
   // Make sure source and destination are both arrays.
   ObjPtr<mirror::Object> srcObject = soa.Decode<mirror::Object>(javaSrc);
   if (UNLIKELY(!srcObject->IsArrayInstance())) {
-    ThrowArrayStoreException_NotAnArray("source", srcObject.Ptr());
+    ThrowArrayStoreException_NotAnArray("source", srcObject);
     return;
   }
   ObjPtr<mirror::Object> dstObject = soa.Decode<mirror::Object>(javaDst);
   if (UNLIKELY(!dstObject->IsArrayInstance())) {
-    ThrowArrayStoreException_NotAnArray("destination", dstObject.Ptr());
+    ThrowArrayStoreException_NotAnArray("destination", dstObject);
     return;
   }
-  mirror::Array* srcArray = srcObject->AsArray();
-  mirror::Array* dstArray = dstObject->AsArray();
+  ObjPtr<mirror::Array> srcArray = srcObject->AsArray();
+  ObjPtr<mirror::Array> dstArray = dstObject->AsArray();
 
   // Bounds checking.
   if (UNLIKELY(srcPos < 0) || UNLIKELY(dstPos < 0) || UNLIKELY(count < 0) ||
@@ -84,8 +85,8 @@
     return;
   }
 
-  mirror::Class* dstComponentType = dstArray->GetClass()->GetComponentType();
-  mirror::Class* srcComponentType = srcArray->GetClass()->GetComponentType();
+  ObjPtr<mirror::Class> dstComponentType = dstArray->GetClass()->GetComponentType();
+  ObjPtr<mirror::Class> srcComponentType = srcArray->GetClass()->GetComponentType();
   Primitive::Type dstComponentPrimitiveType = dstComponentType->GetPrimitiveType();
 
   if (LIKELY(srcComponentType == dstComponentType)) {
@@ -127,23 +128,25 @@
         return;
       }
       default:
-        LOG(FATAL) << "Unknown array type: " << PrettyTypeOf(srcArray);
+        LOG(FATAL) << "Unknown array type: " << srcArray->PrettyTypeOf();
         UNREACHABLE();
     }
   }
   // If one of the arrays holds a primitive type the other array must hold the exact same type.
   if (UNLIKELY((dstComponentPrimitiveType != Primitive::kPrimNot) ||
                srcComponentType->IsPrimitive())) {
-    std::string srcType(PrettyTypeOf(srcArray));
-    std::string dstType(PrettyTypeOf(dstArray));
+    std::string srcType(srcArray->PrettyTypeOf());
+    std::string dstType(dstArray->PrettyTypeOf());
     soa.Self()->ThrowNewExceptionF("Ljava/lang/ArrayStoreException;",
                                    "Incompatible types: src=%s, dst=%s",
                                    srcType.c_str(), dstType.c_str());
     return;
   }
   // Arrays hold distinct types and so therefore can't alias - use memcpy instead of memmove.
-  mirror::ObjectArray<mirror::Object>* dstObjArray = dstArray->AsObjectArray<mirror::Object>();
-  mirror::ObjectArray<mirror::Object>* srcObjArray = srcArray->AsObjectArray<mirror::Object>();
+  ObjPtr<mirror::ObjectArray<mirror::Object>> dstObjArray =
+      dstArray->AsObjectArray<mirror::Object>();
+  ObjPtr<mirror::ObjectArray<mirror::Object>> srcObjArray =
+      srcArray->AsObjectArray<mirror::Object>();
   // If we're assigning into say Object[] then we don't need per element checks.
   if (dstComponentType->IsAssignableFrom(srcComponentType)) {
     dstObjArray->AssignableMemcpy(dstPos, srcObjArray, srcPos, count);
@@ -156,8 +159,9 @@
 
 // Template to convert general array to that of its specific primitive type.
 template <typename T>
-inline T* AsPrimitiveArray(mirror::Array* array) {
-  return down_cast<T*>(array);
+inline ObjPtr<T> AsPrimitiveArray(ObjPtr<mirror::Array> array)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  return ObjPtr<T>::DownCast(array);
 }
 
 template <typename T, Primitive::Type kPrimType>
@@ -167,8 +171,8 @@
   ObjPtr<mirror::Object> srcObject = soa.Decode<mirror::Object>(javaSrc);
   ObjPtr<mirror::Object> dstObject = soa.Decode<mirror::Object>(javaDst);
   DCHECK(dstObject != nullptr);
-  mirror::Array* srcArray = srcObject->AsArray();
-  mirror::Array* dstArray = dstObject->AsArray();
+  ObjPtr<mirror::Array> srcArray = srcObject->AsArray();
+  ObjPtr<mirror::Array> dstArray = dstObject->AsArray();
   DCHECK_GE(count, 0);
   DCHECK_EQ(srcArray->GetClass(), dstArray->GetClass());
   DCHECK_EQ(srcArray->GetClass()->GetComponentType()->GetPrimitiveType(), kPrimType);
diff --git a/runtime/native/java_lang_VMClassLoader.cc b/runtime/native/java_lang_VMClassLoader.cc
index 73d12f1..284d2d1 100644
--- a/runtime/native/java_lang_VMClassLoader.cc
+++ b/runtime/native/java_lang_VMClassLoader.cc
@@ -20,12 +20,41 @@
 #include "jni_internal.h"
 #include "mirror/class_loader.h"
 #include "mirror/object-inl.h"
+#include "obj_ptr.h"
 #include "scoped_fast_native_object_access-inl.h"
 #include "ScopedUtfChars.h"
 #include "zip_archive.h"
 
 namespace art {
 
+// A class so we can be friends with ClassLinker and access internal methods.
+class VMClassLoader {
+ public:
+  static mirror::Class* LookupClass(ClassLinker* cl,
+                                    Thread* self,
+                                    const char* descriptor,
+                                    size_t hash,
+                                    ObjPtr<mirror::ClassLoader> class_loader)
+      REQUIRES(!Locks::classlinker_classes_lock_)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    return cl->LookupClass(self, descriptor, hash, class_loader);
+  }
+
+  static ObjPtr<mirror::Class> FindClassInPathClassLoader(ClassLinker* cl,
+                                                          ScopedObjectAccessAlreadyRunnable& soa,
+                                                          Thread* self,
+                                                          const char* descriptor,
+                                                          size_t hash,
+                                                          Handle<mirror::ClassLoader> class_loader)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    ObjPtr<mirror::Class> result;
+    if (cl->FindClassInBaseDexClassLoader(soa, self, descriptor, hash, class_loader, &result)) {
+      return result;
+    }
+    return nullptr;
+  }
+};
+
 static jclass VMClassLoader_findLoadedClass(JNIEnv* env, jclass, jobject javaLoader,
                                             jstring javaName) {
   ScopedFastNativeObjectAccess soa(env);
@@ -35,43 +64,56 @@
     return nullptr;
   }
   ClassLinker* cl = Runtime::Current()->GetClassLinker();
+
+  // Compute hash once.
   std::string descriptor(DotToDescriptor(name.c_str()));
   const size_t descriptor_hash = ComputeModifiedUtf8Hash(descriptor.c_str());
-  mirror::Class* c = cl->LookupClass(soa.Self(),
-                                     descriptor.c_str(),
-                                     descriptor_hash,
-                                     loader.Ptr());
+
+  ObjPtr<mirror::Class> c = VMClassLoader::LookupClass(cl,
+                                                       soa.Self(),
+                                                       descriptor.c_str(),
+                                                       descriptor_hash,
+                                                       loader);
   if (c != nullptr && c->IsResolved()) {
     return soa.AddLocalReference<jclass>(c);
   }
   // If class is erroneous, throw the earlier failure, wrapped in certain cases. See b/28787733.
   if (c != nullptr && c->IsErroneous()) {
-    cl->ThrowEarlierClassFailure(c);
+    cl->ThrowEarlierClassFailure(c.Ptr());
     Thread* self = soa.Self();
-    mirror::Class* eiie_class =
+    ObjPtr<mirror::Class> eiie_class =
         self->DecodeJObject(WellKnownClasses::java_lang_ExceptionInInitializerError)->AsClass();
-    mirror::Class* iae_class =
+    ObjPtr<mirror::Class> iae_class =
         self->DecodeJObject(WellKnownClasses::java_lang_IllegalAccessError)->AsClass();
-    mirror::Class* ncdfe_class =
+    ObjPtr<mirror::Class> ncdfe_class =
         self->DecodeJObject(WellKnownClasses::java_lang_NoClassDefFoundError)->AsClass();
-    mirror::Class* exception = self->GetException()->GetClass();
+    ObjPtr<mirror::Class> exception = self->GetException()->GetClass();
     if (exception == eiie_class || exception == iae_class || exception == ncdfe_class) {
       self->ThrowNewWrappedException("Ljava/lang/ClassNotFoundException;",
-                                     PrettyDescriptor(c).c_str());
+                                     c->PrettyDescriptor().c_str());
     }
     return nullptr;
   }
+
+  // Hard-coded performance optimization: We know that all failed libcore calls to findLoadedClass
+  //                                      are followed by a call to the the classloader to actually
+  //                                      load the class.
   if (loader != nullptr) {
     // Try the common case.
     StackHandleScope<1> hs(soa.Self());
-    cl->FindClassInPathClassLoader(soa, soa.Self(), descriptor.c_str(), descriptor_hash,
-                                   hs.NewHandle(loader), &c);
+    c = VMClassLoader::FindClassInPathClassLoader(cl,
+                                                  soa,
+                                                  soa.Self(),
+                                                  descriptor.c_str(),
+                                                  descriptor_hash,
+                                                  hs.NewHandle(loader));
     if (c != nullptr) {
       return soa.AddLocalReference<jclass>(c);
     }
   }
-  // Class wasn't resolved so it may be erroneous or not yet ready, force the caller to go into
-  // the regular loadClass code.
+
+  // The class wasn't loaded, yet, and our fast-path did not apply (e.g., we didn't understand the
+  // classloader chain).
   return nullptr;
 }
 
diff --git a/runtime/native/java_lang_ref_FinalizerReference.cc b/runtime/native/java_lang_ref_FinalizerReference.cc
index 1f03c7c..c7d06f4 100644
--- a/runtime/native/java_lang_ref_FinalizerReference.cc
+++ b/runtime/native/java_lang_ref_FinalizerReference.cc
@@ -28,8 +28,7 @@
 static jboolean FinalizerReference_makeCircularListIfUnenqueued(JNIEnv* env, jobject javaThis) {
   ScopedFastNativeObjectAccess soa(env);
   ObjPtr<mirror::FinalizerReference> ref = soa.Decode<mirror::FinalizerReference>(javaThis);
-  return Runtime::Current()->GetHeap()->GetReferenceProcessor()->MakeCircularListIfUnenqueued(
-      ref.Ptr());
+  return Runtime::Current()->GetHeap()->GetReferenceProcessor()->MakeCircularListIfUnenqueued(ref);
 }
 
 static JNINativeMethod gMethods[] = {
diff --git a/runtime/native/java_lang_ref_Reference.cc b/runtime/native/java_lang_ref_Reference.cc
index 95f6d51..bedca10 100644
--- a/runtime/native/java_lang_ref_Reference.cc
+++ b/runtime/native/java_lang_ref_Reference.cc
@@ -28,8 +28,8 @@
 static jobject Reference_getReferent(JNIEnv* env, jobject javaThis) {
   ScopedFastNativeObjectAccess soa(env);
   ObjPtr<mirror::Reference> ref = soa.Decode<mirror::Reference>(javaThis);
-  mirror::Object* const referent =
-      Runtime::Current()->GetHeap()->GetReferenceProcessor()->GetReferent(soa.Self(), ref.Ptr());
+  ObjPtr<mirror::Object> const referent =
+      Runtime::Current()->GetHeap()->GetReferenceProcessor()->GetReferent(soa.Self(), ref);
   return soa.AddLocalReference<jobject>(referent);
 }
 
diff --git a/runtime/native/java_lang_reflect_Array.cc b/runtime/native/java_lang_reflect_Array.cc
index 6f2da33..d827f81 100644
--- a/runtime/native/java_lang_reflect_Array.cc
+++ b/runtime/native/java_lang_reflect_Array.cc
@@ -40,8 +40,9 @@
   DCHECK_EQ(dimensions_obj->GetClass()->GetComponentType()->GetPrimitiveType(),
             Primitive::kPrimInt);
   Handle<mirror::IntArray> dimensions_array(
-      hs.NewHandle(down_cast<mirror::IntArray*>(dimensions_obj.Ptr())));
-  mirror::Array* new_array = mirror::Array::CreateMultiArray(soa.Self(), element_class,
+      hs.NewHandle(ObjPtr<mirror::IntArray>::DownCast(dimensions_obj)));
+  mirror::Array* new_array = mirror::Array::CreateMultiArray(soa.Self(),
+                                                             element_class,
                                                              dimensions_array);
   return soa.AddLocalReference<jobject>(new_array);
 }
@@ -53,17 +54,20 @@
     ThrowNegativeArraySizeException(length);
     return nullptr;
   }
-  mirror::Class* element_class = soa.Decode<mirror::Class>(javaElementClass).Ptr();
+  ObjPtr<mirror::Class> element_class = soa.Decode<mirror::Class>(javaElementClass);
   Runtime* runtime = Runtime::Current();
   ClassLinker* class_linker = runtime->GetClassLinker();
-  mirror::Class* array_class = class_linker->FindArrayClass(soa.Self(), &element_class);
+  ObjPtr<mirror::Class> array_class = class_linker->FindArrayClass(soa.Self(), &element_class);
   if (UNLIKELY(array_class == nullptr)) {
     CHECK(soa.Self()->IsExceptionPending());
     return nullptr;
   }
   DCHECK(array_class->IsObjectArrayClass());
-  mirror::Array* new_array = mirror::ObjectArray<mirror::Object*>::Alloc(
-      soa.Self(), array_class, length, runtime->GetHeap()->GetCurrentAllocator());
+  ObjPtr<mirror::Array> new_array = mirror::ObjectArray<mirror::Object*>::Alloc(
+      soa.Self(),
+      array_class,
+      length,
+      runtime->GetHeap()->GetCurrentAllocator());
   return soa.AddLocalReference<jobject>(new_array);
 }
 
diff --git a/runtime/native/java_lang_reflect_Constructor.cc b/runtime/native/java_lang_reflect_Constructor.cc
index 7de0147..66a5359 100644
--- a/runtime/native/java_lang_reflect_Constructor.cc
+++ b/runtime/native/java_lang_reflect_Constructor.cc
@@ -39,13 +39,13 @@
       annotations::GetExceptionTypesForMethod(method);
   if (result_array == nullptr) {
     // Return an empty array instead of a null pointer.
-    mirror::Class* class_class = mirror::Class::GetJavaLangClass();
-    mirror::Class* class_array_class =
+    ObjPtr<mirror::Class> class_class = mirror::Class::GetJavaLangClass();
+    ObjPtr<mirror::Class> class_array_class =
         Runtime::Current()->GetClassLinker()->FindArrayClass(soa.Self(), &class_class);
     if (class_array_class == nullptr) {
       return nullptr;
     }
-    mirror::ObjectArray<mirror::Class>* empty_array =
+    ObjPtr<mirror::ObjectArray<mirror::Class>> empty_array =
         mirror::ObjectArray<mirror::Class>::Alloc(soa.Self(), class_array_class, 0);
     return soa.AddLocalReference<jobjectArray>(empty_array);
   } else {
@@ -66,7 +66,7 @@
   if (UNLIKELY(c->IsAbstract())) {
     soa.Self()->ThrowNewExceptionF("Ljava/lang/InstantiationException;", "Can't instantiate %s %s",
                                    c->IsInterface() ? "interface" : "abstract class",
-                                   PrettyDescriptor(c.Get()).c_str());
+                                   c->PrettyDescriptor().c_str());
     return nullptr;
   }
   // Verify that we can access the class.
@@ -77,7 +77,7 @@
     // If caller is null, then we called from JNI, just avoid the check since JNI avoids most
     // access checks anyways. TODO: Investigate if this the correct behavior.
     if (caller != nullptr && !caller->CanAccess(c.Get())) {
-      if (PrettyDescriptor(c.Get()) == "dalvik.system.DexPathList$Element") {
+      if (c->PrettyDescriptor() == "dalvik.system.DexPathList$Element") {
         // b/20699073.
         LOG(WARNING) << "The dalvik.system.DexPathList$Element constructor is not accessible by "
                         "default. This is a temporary workaround for backwards compatibility "
@@ -85,7 +85,8 @@
       } else {
         soa.Self()->ThrowNewExceptionF(
             "Ljava/lang/IllegalAccessException;", "%s is not accessible from %s",
-            PrettyClass(c.Get()).c_str(), PrettyClass(caller).c_str());
+            c->PrettyClass().c_str(),
+            caller->PrettyClass().c_str());
         return nullptr;
       }
     }
@@ -104,7 +105,7 @@
     return InvokeMethod(soa, javaMethod, nullptr, javaArgs, 2);
   }
 
-  mirror::Object* receiver =
+  ObjPtr<mirror::Object> receiver =
       movable ? c->AllocObject(soa.Self()) : c->AllocNonMovableObject(soa.Self());
   if (receiver == nullptr) {
     return nullptr;
diff --git a/runtime/native/java_lang_reflect_Executable.cc b/runtime/native/java_lang_reflect_Executable.cc
index e317c25..73b81a7 100644
--- a/runtime/native/java_lang_reflect_Executable.cc
+++ b/runtime/native/java_lang_reflect_Executable.cc
@@ -38,7 +38,7 @@
     ObjPtr<mirror::Class> annotation_array_class =
         soa.Decode<mirror::Class>(WellKnownClasses::java_lang_annotation_Annotation__array);
     ObjPtr<mirror::ObjectArray<mirror::Object>> empty_array =
-        mirror::ObjectArray<mirror::Object>::Alloc(soa.Self(), annotation_array_class.Ptr(), 0);
+        mirror::ObjectArray<mirror::Object>::Alloc(soa.Self(), annotation_array_class, 0);
     return soa.AddLocalReference<jobjectArray>(empty_array);
   }
   return soa.AddLocalReference<jobjectArray>(annotations::GetAnnotationsForMethod(method));
@@ -102,7 +102,7 @@
   if (UNLIKELY(names.Get() == nullptr || access_flags.Get() == nullptr)) {
     ThrowIllegalArgumentException(
         StringPrintf("Missing parameter metadata for names or access flags for %s",
-                     PrettyMethod(art_method).c_str()).c_str());
+                     art_method->PrettyMethod().c_str()).c_str());
     return nullptr;
   }
 
@@ -113,7 +113,7 @@
     ThrowIllegalArgumentException(
         StringPrintf(
             "Inconsistent parameter metadata for %s. names length: %d, access flags length: %d",
-            PrettyMethod(art_method).c_str(),
+            art_method->PrettyMethod().c_str(),
             names_count,
             access_flags_count).c_str());
     return nullptr;
@@ -136,7 +136,7 @@
   Handle<mirror::Class> parameter_class =
       hs.NewHandle(soa.Decode<mirror::Class>(WellKnownClasses::java_lang_reflect_Parameter));
   ArtMethod* parameter_init =
-      soa.DecodeMethod(WellKnownClasses::java_lang_reflect_Parameter_init);
+      jni::DecodeArtMethod(WellKnownClasses::java_lang_reflect_Parameter_init);
 
   // Mutable handles used in the loop below to ensure cleanup without scaling the number of
   // handles by the number of parameters.
diff --git a/runtime/native/java_lang_reflect_Field.cc b/runtime/native/java_lang_reflect_Field.cc
index 86b42d0..6206948 100644
--- a/runtime/native/java_lang_reflect_Field.cc
+++ b/runtime/native/java_lang_reflect_Field.cc
@@ -39,25 +39,25 @@
     ThrowIllegalAccessException(
             StringPrintf("Cannot set %s field %s of class %s",
                 PrettyJavaAccessFlags(field->GetAccessFlags()).c_str(),
-                PrettyField(field->GetArtField()).c_str(),
+                ArtField::PrettyField(field->GetArtField()).c_str(),
                 field->GetDeclaringClass() == nullptr ? "null" :
-                    PrettyClass(field->GetDeclaringClass()).c_str()).c_str());
+                    field->GetDeclaringClass()->PrettyClass().c_str()).c_str());
     return false;
   }
   ObjPtr<mirror::Class> calling_class;
   if (!VerifyAccess(self,
-                    MakeObjPtr(obj),
-                    MakeObjPtr(field->GetDeclaringClass()),
+                    obj,
+                    field->GetDeclaringClass(),
                     field->GetAccessFlags(),
                     &calling_class,
                     1)) {
     ThrowIllegalAccessException(
             StringPrintf("Class %s cannot access %s field %s of class %s",
-                calling_class == nullptr ? "null" : PrettyClass(calling_class).c_str(),
+                calling_class == nullptr ? "null" : calling_class->PrettyClass().c_str(),
                 PrettyJavaAccessFlags(field->GetAccessFlags()).c_str(),
-                PrettyField(field->GetArtField()).c_str(),
+                ArtField::PrettyField(field->GetArtField()).c_str(),
                 field->GetDeclaringClass() == nullptr ? "null" :
-                    PrettyClass(field->GetDeclaringClass()).c_str()).c_str());
+                    field->GetDeclaringClass()->PrettyClass().c_str()).c_str());
     return false;
   }
   return true;
@@ -106,7 +106,8 @@
       break;
   }
   ThrowIllegalArgumentException(
-      StringPrintf("Not a primitive field: %s", PrettyField(f->GetArtField()).c_str()).c_str());
+      StringPrintf("Not a primitive field: %s",
+                   ArtField::PrettyField(f->GetArtField()).c_str()).c_str());
   return false;
 }
 
@@ -306,8 +307,9 @@
     FALLTHROUGH_INTENDED;
   case Primitive::kPrimVoid:
     // Never okay.
-    ThrowIllegalArgumentException(StringPrintf("Not a primitive field: %s",
-                                               PrettyField(f->GetArtField()).c_str()).c_str());
+    ThrowIllegalArgumentException(
+        StringPrintf("Not a primitive field: %s",
+                     ArtField::PrettyField(f->GetArtField()).c_str()).c_str());
     return;
   }
 }
@@ -335,7 +337,7 @@
   ObjPtr<mirror::Object> boxed_value = soa.Decode<mirror::Object>(javaValue);
   JValue unboxed_value;
   if (!UnboxPrimitiveForField(boxed_value,
-                              MakeObjPtr(field_type),
+                              field_type,
                               f->GetArtField(),
                               &unboxed_value)) {
     DCHECK(soa.Self()->IsExceptionPending());
@@ -362,8 +364,9 @@
   }
   Primitive::Type field_type = f->GetTypeAsPrimitiveType();
   if (UNLIKELY(field_type == Primitive::kPrimNot)) {
-    ThrowIllegalArgumentException(StringPrintf("Not a primitive field: %s",
-                                               PrettyField(f->GetArtField()).c_str()).c_str());
+    ThrowIllegalArgumentException(
+        StringPrintf("Not a primitive field: %s",
+                     ArtField::PrettyField(f->GetArtField()).c_str()).c_str());
     return;
   }
 
@@ -443,6 +446,12 @@
   return soa.AddLocalReference<jobject>(annotations::GetAnnotationForField(field, klass));
 }
 
+static jlong Field_getArtField(JNIEnv* env, jobject javaField) {
+  ScopedFastNativeObjectAccess soa(env);
+  ArtField* field = soa.Decode<mirror::Field>(javaField)->GetArtField();
+  return reinterpret_cast<jlong>(field);
+}
+
 static jobjectArray Field_getDeclaredAnnotations(JNIEnv* env, jobject javaField) {
   ScopedFastNativeObjectAccess soa(env);
   ArtField* field = soa.Decode<mirror::Field>(javaField)->GetArtField();
@@ -486,6 +495,7 @@
   NATIVE_METHOD(Field, getChar,    "!(Ljava/lang/Object;)C"),
   NATIVE_METHOD(Field, getAnnotationNative,
                 "!(Ljava/lang/Class;)Ljava/lang/annotation/Annotation;"),
+  NATIVE_METHOD(Field, getArtField, "!()J"),
   NATIVE_METHOD(Field, getDeclaredAnnotations, "!()[Ljava/lang/annotation/Annotation;"),
   NATIVE_METHOD(Field, getSignatureAnnotation, "!()[Ljava/lang/String;"),
   NATIVE_METHOD(Field, getDouble,  "!(Ljava/lang/Object;)D"),
diff --git a/runtime/native/java_lang_reflect_Method.cc b/runtime/native/java_lang_reflect_Method.cc
index b5f2f7c..a6589bc 100644
--- a/runtime/native/java_lang_reflect_Method.cc
+++ b/runtime/native/java_lang_reflect_Method.cc
@@ -44,7 +44,7 @@
   ScopedFastNativeObjectAccess soa(env);
   ArtMethod* method = ArtMethod::FromReflectedMethod(soa, javaMethod);
   if (method->GetDeclaringClass()->IsProxyClass()) {
-    mirror::Class* klass = method->GetDeclaringClass();
+    ObjPtr<mirror::Class> klass = method->GetDeclaringClass();
     int throws_index = -1;
     size_t i = 0;
     for (const auto& m : klass->GetDeclaredVirtualMethods(kRuntimePointerSize)) {
@@ -62,8 +62,8 @@
         annotations::GetExceptionTypesForMethod(method);
     if (result_array == nullptr) {
       // Return an empty array instead of a null pointer
-      mirror::Class* class_class = mirror::Class::GetJavaLangClass();
-      mirror::Class* class_array_class =
+      ObjPtr<mirror::Class> class_class = mirror::Class::GetJavaLangClass();
+      ObjPtr<mirror::Class> class_array_class =
           Runtime::Current()->GetClassLinker()->FindArrayClass(soa.Self(), &class_class);
       if (class_array_class == nullptr) {
         return nullptr;
diff --git a/runtime/native/java_lang_reflect_Parameter.cc b/runtime/native/java_lang_reflect_Parameter.cc
index 6060b8a..16164d2 100644
--- a/runtime/native/java_lang_reflect_Parameter.cc
+++ b/runtime/native/java_lang_reflect_Parameter.cc
@@ -47,7 +47,7 @@
     ThrowIllegalArgumentException(
         StringPrintf("Illegal parameterIndex %d for %s, parameter_count is %d",
                      parameterIndex,
-                     PrettyMethod(method).c_str(),
+                     method->PrettyMethod().c_str(),
                      parameter_count).c_str());
     return nullptr;
   }
diff --git a/runtime/native/sun_misc_Unsafe.cc b/runtime/native/sun_misc_Unsafe.cc
index 0bdb5a4..644df07 100644
--- a/runtime/native/sun_misc_Unsafe.cc
+++ b/runtime/native/sun_misc_Unsafe.cc
@@ -65,14 +65,14 @@
     mirror::HeapReference<mirror::Object>* field_addr =
         reinterpret_cast<mirror::HeapReference<mirror::Object>*>(
             reinterpret_cast<uint8_t*>(obj.Ptr()) + static_cast<size_t>(offset));
-    ReadBarrier::Barrier<mirror::Object, kWithReadBarrier, /*kAlwaysUpdateField*/true>(
+    ReadBarrier::Barrier<mirror::Object, kWithReadBarrier, /* kAlwaysUpdateField */ true>(
         obj.Ptr(),
         MemberOffset(offset),
         field_addr);
   }
   bool success = obj->CasFieldStrongSequentiallyConsistentObject<false>(MemberOffset(offset),
-                                                                        expectedValue.Ptr(),
-                                                                        newValue.Ptr());
+                                                                        expectedValue,
+                                                                        newValue);
   return success ? JNI_TRUE : JNI_FALSE;
 }
 
@@ -168,7 +168,7 @@
   ObjPtr<mirror::Object> obj = soa.Decode<mirror::Object>(javaObj);
   ObjPtr<mirror::Object> newValue = soa.Decode<mirror::Object>(javaNewValue);
   // JNI must use non transactional mode.
-  obj->SetFieldObject<false>(MemberOffset(offset), newValue.Ptr());
+  obj->SetFieldObject<false>(MemberOffset(offset), newValue);
 }
 
 static void Unsafe_putObjectVolatile(JNIEnv* env, jobject, jobject javaObj, jlong offset,
@@ -177,7 +177,7 @@
   ObjPtr<mirror::Object> obj = soa.Decode<mirror::Object>(javaObj);
   ObjPtr<mirror::Object> newValue = soa.Decode<mirror::Object>(javaNewValue);
   // JNI must use non transactional mode.
-  obj->SetFieldObjectVolatile<false>(MemberOffset(offset), newValue.Ptr());
+  obj->SetFieldObjectVolatile<false>(MemberOffset(offset), newValue);
 }
 
 static void Unsafe_putOrderedObject(JNIEnv* env, jobject, jobject javaObj, jlong offset,
@@ -187,7 +187,7 @@
   ObjPtr<mirror::Object> newValue = soa.Decode<mirror::Object>(javaNewValue);
   QuasiAtomic::ThreadFenceRelease();
   // JNI must use non transactional mode.
-  obj->SetFieldObject<false>(MemberOffset(offset), newValue.Ptr());
+  obj->SetFieldObject<false>(MemberOffset(offset), newValue);
 }
 
 static jint Unsafe_getArrayBaseOffsetForComponentType(JNIEnv* env, jclass, jobject component_class) {
@@ -305,7 +305,8 @@
 }
 
 template<typename T>
-static void copyToArray(jlong srcAddr, mirror::PrimitiveArray<T>* array,
+static void copyToArray(jlong srcAddr,
+                        ObjPtr<mirror::PrimitiveArray<T>> array,
                         size_t array_offset,
                         size_t size)
         REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -318,7 +319,8 @@
 }
 
 template<typename T>
-static void copyFromArray(jlong dstAddr, mirror::PrimitiveArray<T>* array,
+static void copyFromArray(jlong dstAddr,
+                          ObjPtr<mirror::PrimitiveArray<T>> array,
                           size_t array_offset,
                           size_t size)
         REQUIRES_SHARED(Locks::mutator_lock_) {
@@ -347,15 +349,15 @@
   size_t sz = (size_t)size;
   size_t dst_offset = (size_t)dstOffset;
   ObjPtr<mirror::Object> dst = soa.Decode<mirror::Object>(dstObj);
-  mirror::Class* component_type = dst->GetClass()->GetComponentType();
+  ObjPtr<mirror::Class> component_type = dst->GetClass()->GetComponentType();
   if (component_type->IsPrimitiveByte() || component_type->IsPrimitiveBoolean()) {
-    copyToArray(srcAddr, dst->AsByteSizedArray(), dst_offset, sz);
+    copyToArray(srcAddr, MakeObjPtr(dst->AsByteSizedArray()), dst_offset, sz);
   } else if (component_type->IsPrimitiveShort() || component_type->IsPrimitiveChar()) {
-    copyToArray(srcAddr, dst->AsShortSizedArray(), dst_offset, sz);
+    copyToArray(srcAddr, MakeObjPtr(dst->AsShortSizedArray()), dst_offset, sz);
   } else if (component_type->IsPrimitiveInt() || component_type->IsPrimitiveFloat()) {
-    copyToArray(srcAddr, dst->AsIntArray(), dst_offset, sz);
+    copyToArray(srcAddr, MakeObjPtr(dst->AsIntArray()), dst_offset, sz);
   } else if (component_type->IsPrimitiveLong() || component_type->IsPrimitiveDouble()) {
-    copyToArray(srcAddr, dst->AsLongArray(), dst_offset, sz);
+    copyToArray(srcAddr, MakeObjPtr(dst->AsLongArray()), dst_offset, sz);
   } else {
     ThrowIllegalAccessException("not a primitive array");
   }
@@ -378,15 +380,15 @@
   size_t sz = (size_t)size;
   size_t src_offset = (size_t)srcOffset;
   ObjPtr<mirror::Object> src = soa.Decode<mirror::Object>(srcObj);
-  mirror::Class* component_type = src->GetClass()->GetComponentType();
+  ObjPtr<mirror::Class> component_type = src->GetClass()->GetComponentType();
   if (component_type->IsPrimitiveByte() || component_type->IsPrimitiveBoolean()) {
-    copyFromArray(dstAddr, src->AsByteSizedArray(), src_offset, sz);
+    copyFromArray(dstAddr, MakeObjPtr(src->AsByteSizedArray()), src_offset, sz);
   } else if (component_type->IsPrimitiveShort() || component_type->IsPrimitiveChar()) {
-    copyFromArray(dstAddr, src->AsShortSizedArray(), src_offset, sz);
+    copyFromArray(dstAddr, MakeObjPtr(src->AsShortSizedArray()), src_offset, sz);
   } else if (component_type->IsPrimitiveInt() || component_type->IsPrimitiveFloat()) {
-    copyFromArray(dstAddr, src->AsIntArray(), src_offset, sz);
+    copyFromArray(dstAddr, MakeObjPtr(src->AsIntArray()), src_offset, sz);
   } else if (component_type->IsPrimitiveLong() || component_type->IsPrimitiveDouble()) {
-    copyFromArray(dstAddr, src->AsLongArray(), src_offset, sz);
+    copyFromArray(dstAddr, MakeObjPtr(src->AsLongArray()), src_offset, sz);
   } else {
     ThrowIllegalAccessException("not a primitive array");
   }
diff --git a/runtime/native_bridge_art_interface.cc b/runtime/native_bridge_art_interface.cc
index 059dc5a..c58854b 100644
--- a/runtime/native_bridge_art_interface.cc
+++ b/runtime/native_bridge_art_interface.cc
@@ -25,6 +25,7 @@
 #include "base/logging.h"
 #include "base/macros.h"
 #include "dex_file-inl.h"
+#include "jni_internal.h"
 #include "mirror/class-inl.h"
 #include "scoped_thread_state_change-inl.h"
 #include "sigchain.h"
@@ -33,7 +34,7 @@
 
 static const char* GetMethodShorty(JNIEnv* env, jmethodID mid) {
   ScopedObjectAccess soa(env);
-  ArtMethod* m = soa.DecodeMethod(mid);
+  ArtMethod* m = jni::DecodeArtMethod(mid);
   return m->GetShorty();
 }
 
@@ -69,7 +70,8 @@
         methods[count].fnPtr = m.GetEntryPointFromJni();
         count++;
       } else {
-        LOG(WARNING) << "Output native method array too small. Skipping " << PrettyMethod(&m);
+        LOG(WARNING) << "Output native method array too small. Skipping "
+                     << m.PrettyMethod();
       }
     }
   }
@@ -89,14 +91,14 @@
   GetMethodShorty, GetNativeMethodCount, GetNativeMethods
 };
 
-bool LoadNativeBridge(std::string& native_bridge_library_filename) {
+bool LoadNativeBridge(const std::string& native_bridge_library_filename) {
   VLOG(startup) << "Runtime::Setup native bridge library: "
       << (native_bridge_library_filename.empty() ? "(empty)" : native_bridge_library_filename);
   return android::LoadNativeBridge(native_bridge_library_filename.c_str(),
                                    &native_bridge_art_callbacks_);
 }
 
-void PreInitializeNativeBridge(std::string dir) {
+void PreInitializeNativeBridge(const std::string& dir) {
   VLOG(startup) << "Runtime::Pre-initialize native bridge";
 #ifndef __APPLE__  // Mac OS does not support CLONE_NEWNS.
   if (unshare(CLONE_NEWNS) == -1) {
diff --git a/runtime/native_bridge_art_interface.h b/runtime/native_bridge_art_interface.h
index 090cddb..c86e5da 100644
--- a/runtime/native_bridge_art_interface.h
+++ b/runtime/native_bridge_art_interface.h
@@ -26,10 +26,10 @@
 // Mirror libnativebridge interface. Done to have the ART callbacks out of line, and not require
 // the system/core header file in other files.
 
-bool LoadNativeBridge(std::string& native_bridge_library_filename);
+bool LoadNativeBridge(const std::string& native_bridge_library_filename);
 
 // This is mostly for testing purposes, as in a full system this is called by Zygote code.
-void PreInitializeNativeBridge(std::string dir);
+void PreInitializeNativeBridge(const std::string& dir);
 
 void InitializeNativeBridge(JNIEnv* env, const char* instruction_set);
 
diff --git a/runtime/native_stack_dump.cc b/runtime/native_stack_dump.cc
index c20c8b8..2376889 100644
--- a/runtime/native_stack_dump.cc
+++ b/runtime/native_stack_dump.cc
@@ -256,7 +256,7 @@
   Drain(2U, prefix, pipe, os);
 }
 
-static bool RunCommand(std::string cmd) {
+static bool RunCommand(const std::string& cmd) {
   FILE* stream = popen(cmd.c_str(), "r");
   if (stream) {
     pclose(stream);
@@ -296,10 +296,10 @@
   std::unique_ptr<Backtrace> backtrace(Backtrace::Create(BACKTRACE_CURRENT_PROCESS, tid, map));
   if (!backtrace->Unwind(0, reinterpret_cast<ucontext*>(ucontext_ptr))) {
     os << prefix << "(backtrace::Unwind failed for thread " << tid
-       << ": " <<  backtrace->GetErrorString(backtrace->GetError()) << ")\n";
+       << ": " <<  backtrace->GetErrorString(backtrace->GetError()) << ")" << std::endl;
     return;
   } else if (backtrace->NumFrames() == 0) {
-    os << prefix << "(no native stack frames for thread " << tid << ")\n";
+    os << prefix << "(no native stack frames for thread " << tid << ")" << std::endl;
     return;
   }
 
@@ -347,14 +347,14 @@
           Locks::mutator_lock_->IsSharedHeld(Thread::Current()) &&
           PcIsWithinQuickCode(current_method, it->pc)) {
         const void* start_of_code = current_method->GetEntryPointFromQuickCompiledCode();
-        os << JniLongName(current_method) << "+"
+        os << current_method->JniLongName() << "+"
            << (it->pc - reinterpret_cast<uintptr_t>(start_of_code));
       } else {
         os << "???";
       }
       os << ")";
     }
-    os << "\n";
+    os << std::endl;
     if (try_addr2line && use_addr2line) {
       Addr2line(it->map.name, it->pc - it->map.start, os, prefix, &addr2line_state);
     }
@@ -395,7 +395,7 @@
     if (include_count) {
       os << StringPrintf("#%02zd ", i);
     }
-    os << text << "\n";
+    os << text << std::endl;
   }
 }
 
diff --git a/runtime/oat.h b/runtime/oat.h
index 4d8687c..8c84d42 100644
--- a/runtime/oat.h
+++ b/runtime/oat.h
@@ -32,7 +32,7 @@
 class PACKED(4) OatHeader {
  public:
   static constexpr uint8_t kOatMagic[] = { 'o', 'a', 't', '\n' };
-  static constexpr uint8_t kOatVersion[] = { '0', '8', '9', '\0' };
+  static constexpr uint8_t kOatVersion[] = { '0', '9', '2', '\0' };
 
   static constexpr const char* kImageLocationKey = "image-location";
   static constexpr const char* kDex2OatCmdLineKey = "dex2oat-cmdline";
diff --git a/runtime/oat_file.cc b/runtime/oat_file.cc
index f164a92..c14b616 100644
--- a/runtime/oat_file.cc
+++ b/runtime/oat_file.cc
@@ -959,7 +959,7 @@
     DCHECK(!error_msg->empty());
     return false;
   }
-  bool loaded = elf_file_->Load(executable, low_4gb, error_msg);
+  bool loaded = elf_file_->Load(file, executable, low_4gb, error_msg);
   DCHECK(loaded || !error_msg->empty());
   return loaded;
 }
diff --git a/runtime/oat_file.h b/runtime/oat_file.h
index b99bcb5..63a0e14 100644
--- a/runtime/oat_file.h
+++ b/runtime/oat_file.h
@@ -301,6 +301,10 @@
   // error and sets found to false.
   static OatClass FindOatClass(const DexFile& dex_file, uint16_t class_def_idx, bool* found);
 
+  VdexFile* GetVdexFile() const {
+    return vdex_.get();
+  }
+
  protected:
   OatFile(const std::string& filename, bool executable);
 
diff --git a/runtime/oat_file_assistant.cc b/runtime/oat_file_assistant.cc
index ff00451..0679360 100644
--- a/runtime/oat_file_assistant.cc
+++ b/runtime/oat_file_assistant.cc
@@ -595,7 +595,7 @@
 
   std::vector<std::string> args;
   args.push_back("--dex-file=" + dex_location_);
-  args.push_back("--vdex-fd=" + std::to_string(vdex_file->Fd()));
+  args.push_back("--output-vdex-fd=" + std::to_string(vdex_file->Fd()));
   args.push_back("--oat-fd=" + std::to_string(oat_file->Fd()));
   args.push_back("--oat-location=" + oat_file_name);
 
diff --git a/runtime/oat_file_assistant_test.cc b/runtime/oat_file_assistant_test.cc
index d18e946..d4337b9 100644
--- a/runtime/oat_file_assistant_test.cc
+++ b/runtime/oat_file_assistant_test.cc
@@ -999,7 +999,7 @@
 
 // Turn an absolute path into a path relative to the current working
 // directory.
-static std::string MakePathRelative(std::string target) {
+static std::string MakePathRelative(const std::string& target) {
   char buf[MAXPATHLEN];
   std::string cwd = getcwd(buf, MAXPATHLEN);
 
diff --git a/runtime/oat_file_manager.cc b/runtime/oat_file_manager.cc
index 64e5a63..5641459 100644
--- a/runtime/oat_file_manager.cc
+++ b/runtime/oat_file_manager.cc
@@ -28,6 +28,7 @@
 #include "gc/scoped_gc_critical_section.h"
 #include "gc/space/image_space.h"
 #include "handle_scope-inl.h"
+#include "jni_internal.h"
 #include "mirror/class_loader.h"
 #include "oat_file_assistant.h"
 #include "obj_ptr-inl.h"
@@ -224,9 +225,10 @@
   }
 }
 
+template <typename T>
 static void IterateOverJavaDexFile(ObjPtr<mirror::Object> dex_file,
                                    ArtField* const cookie_field,
-                                   std::function<bool(const DexFile*)> fn)
+                                   const T& fn)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   if (dex_file != nullptr) {
     mirror::LongArray* long_array = cookie_field->GetObject(dex_file)->AsLongArray();
@@ -247,26 +249,27 @@
   }
 }
 
+template <typename T>
 static void IterateOverPathClassLoader(
-    ScopedObjectAccessAlreadyRunnable& soa,
     Handle<mirror::ClassLoader> class_loader,
     MutableHandle<mirror::ObjectArray<mirror::Object>> dex_elements,
-    std::function<bool(const DexFile*)> fn) REQUIRES_SHARED(Locks::mutator_lock_) {
+    const T& fn) REQUIRES_SHARED(Locks::mutator_lock_) {
   // Handle this step.
   // Handle as if this is the child PathClassLoader.
   // The class loader is a PathClassLoader which inherits from BaseDexClassLoader.
   // We need to get the DexPathList and loop through it.
-  ArtField* const cookie_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie);
+  ArtField* const cookie_field =
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexFile_cookie);
   ArtField* const dex_file_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
   ObjPtr<mirror::Object> dex_path_list =
-      soa.DecodeField(WellKnownClasses::dalvik_system_PathClassLoader_pathList)->
-      GetObject(class_loader.Get());
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_BaseDexClassLoader_pathList)->
+          GetObject(class_loader.Get());
   if (dex_path_list != nullptr && dex_file_field != nullptr && cookie_field != nullptr) {
     // DexPathList has an array dexElements of Elements[] which each contain a dex file.
     ObjPtr<mirror::Object> dex_elements_obj =
-        soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList_dexElements)->
-        GetObject(dex_path_list);
+        jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList_dexElements)->
+            GetObject(dex_path_list);
     // Loop through each dalvik.system.DexPathList$Element's dalvik.system.DexFile and look
     // at the mCookie which is a DexFile vector.
     if (dex_elements_obj != nullptr) {
@@ -297,7 +300,8 @@
   // Unsupported class-loader?
   if (soa.Decode<mirror::Class>(WellKnownClasses::dalvik_system_PathClassLoader) !=
       class_loader->GetClass()) {
-    VLOG(class_linker) << "Unsupported class-loader " << PrettyClass(class_loader->GetClass());
+    VLOG(class_linker) << "Unsupported class-loader "
+                       << mirror::Class::PrettyClass(class_loader->GetClass());
     return false;
   }
 
@@ -322,7 +326,7 @@
       hs.NewHandle<mirror::ObjectArray<mirror::Object>>(nullptr));
   Handle<mirror::ClassLoader> h_class_loader(hs.NewHandle(class_loader));
 
-  IterateOverPathClassLoader(soa, h_class_loader, dex_elements, GetDexFilesFn);
+  IterateOverPathClassLoader(h_class_loader, dex_elements, GetDexFilesFn);
 
   return true;
 }
@@ -336,9 +340,10 @@
     return;
   }
 
-  ArtField* const cookie_field = soa.DecodeField(WellKnownClasses::dalvik_system_DexFile_cookie);
+  ArtField* const cookie_field =
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexFile_cookie);
   ArtField* const dex_file_field =
-      soa.DecodeField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
+      jni::DecodeArtField(WellKnownClasses::dalvik_system_DexPathList__Element_dexFile);
   ObjPtr<mirror::Class> const element_class = soa.Decode<mirror::Class>(
       WellKnownClasses::dalvik_system_DexPathList__Element);
   ObjPtr<mirror::Class> const dexfile_class = soa.Decode<mirror::Class>(
@@ -367,7 +372,8 @@
     } else if (dexfile_class == element->GetClass()) {
       dex_file = element;
     } else {
-      LOG(WARNING) << "Unsupported element in dex_elements: " << PrettyClass(element->GetClass());
+      LOG(WARNING) << "Unsupported element in dex_elements: "
+                   << mirror::Class::PrettyClass(element->GetClass());
       continue;
     }
 
@@ -375,7 +381,7 @@
   }
 }
 
-static bool AreSharedLibrariesOk(const std::string shared_libraries,
+static bool AreSharedLibrariesOk(const std::string& shared_libraries,
                                  std::priority_queue<DexFileAndClassPair>& queue) {
   if (shared_libraries.empty()) {
     if (queue.empty()) {
@@ -396,10 +402,14 @@
     while (!temp.empty() && index < shared_libraries_split.size() - 1) {
       DexFileAndClassPair pair(temp.top());
       const DexFile* dex_file = pair.GetDexFile();
-      std::string dex_filename(dex_file->GetLocation());
+      const std::string& dex_filename = dex_file->GetLocation();
+      if (dex_filename != shared_libraries_split[index]) {
+        break;
+      }
+      char* end;
+      size_t shared_lib_checksum = strtoul(shared_libraries_split[index + 1].c_str(), &end, 10);
       uint32_t dex_checksum = dex_file->GetLocationChecksum();
-      if (dex_filename != shared_libraries_split[index] ||
-          dex_checksum != std::stoul(shared_libraries_split[index + 1])) {
+      if (*end != '\0' || dex_checksum != shared_lib_checksum) {
         break;
       }
       temp.pop();
@@ -455,7 +465,7 @@
       GetDexFilesFromDexElementsArray(soa, h_dex_elements, &queue);
     } else if (h_class_loader.Get() != nullptr) {
       VLOG(class_linker) << "Something unsupported with "
-                         << PrettyClass(h_class_loader->GetClass());
+                         << mirror::Class::PrettyClass(h_class_loader->GetClass());
     }
   }
 
diff --git a/runtime/oat_quick_method_header.cc b/runtime/oat_quick_method_header.cc
index a68d9f8..9c2378d 100644
--- a/runtime/oat_quick_method_header.cc
+++ b/runtime/oat_quick_method_header.cc
@@ -56,7 +56,7 @@
            << reinterpret_cast<void*>(sought_offset)
            << "(PC " << reinterpret_cast<void*>(pc) << ", entry_point=" << entry_point
            << " current entry_point=" << method->GetEntryPointFromQuickCompiledCode()
-           << ") in " << PrettyMethod(method);
+           << ") in " << method->PrettyMethod();
   }
   return DexFile::kDexNoIndex;
 }
@@ -85,7 +85,7 @@
   if (abort_on_failure) {
     ScopedObjectAccess soa(Thread::Current());
     LOG(FATAL) << "Failed to find native offset for dex pc 0x" << std::hex << dex_pc
-               << " in " << PrettyMethod(method);
+               << " in " << method->PrettyMethod();
   }
   return UINTPTR_MAX;
 }
diff --git a/runtime/obj_ptr-inl.h b/runtime/obj_ptr-inl.h
index f0a5f6f..d0be6dc 100644
--- a/runtime/obj_ptr-inl.h
+++ b/runtime/obj_ptr-inl.h
@@ -33,7 +33,7 @@
 template<class MirrorType, bool kPoison>
 inline void ObjPtr<MirrorType, kPoison>::AssertValid() const {
   if (kPoison) {
-    CHECK(IsValid()) << "Stale object pointer " << Ptr() << " , expected cookie "
+    CHECK(IsValid()) << "Stale object pointer " << PtrUnchecked() << " , expected cookie "
         << TrimCookie(Thread::Current()->GetPoisonObjectCookie()) << " but got " << GetCookie();
   }
 }
diff --git a/runtime/obj_ptr.h b/runtime/obj_ptr.h
index beb4d33..d24c6fb 100644
--- a/runtime/obj_ptr.h
+++ b/runtime/obj_ptr.h
@@ -18,10 +18,11 @@
 #define ART_RUNTIME_OBJ_PTR_H_
 
 #include <ostream>
+#include <type_traits>
 
+#include "base/macros.h"
 #include "base/mutex.h"  // For Locks::mutator_lock_.
 #include "globals.h"
-#include "mirror/object_reference.h"
 
 namespace art {
 
@@ -31,7 +32,7 @@
 template<class MirrorType, bool kPoison = kIsDebugBuild>
 class ObjPtr {
   static constexpr size_t kCookieShift =
-      sizeof(mirror::HeapReference<mirror::Object>) * kBitsPerByte - kObjectAlignmentShift;
+      sizeof(kHeapReferenceSize) * kBitsPerByte - kObjectAlignmentShift;
   static constexpr size_t kCookieBits = sizeof(uintptr_t) * kBitsPerByte - kCookieShift;
   static constexpr uintptr_t kCookieMask = (static_cast<uintptr_t>(1u) << kCookieBits) - 1;
 
@@ -41,18 +42,36 @@
  public:
   ALWAYS_INLINE ObjPtr() REQUIRES_SHARED(Locks::mutator_lock_) : reference_(0u) {}
 
-  ALWAYS_INLINE ObjPtr(std::nullptr_t) REQUIRES_SHARED(Locks::mutator_lock_) : reference_(0u) {}
+  // Note: The following constructors allow implicit conversion. This simplifies code that uses
+  //       them, e.g., for parameter passing. However, in general, implicit-conversion constructors
+  //       are discouraged and detected by cpplint and clang-tidy. So mark these constructors
+  //       as NOLINT (without category, as the categories are different).
+
+  ALWAYS_INLINE ObjPtr(std::nullptr_t)  // NOLINT
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      : reference_(0u) {}
 
   template <typename Type>
-  ALWAYS_INLINE ObjPtr(Type* ptr) REQUIRES_SHARED(Locks::mutator_lock_)
-      : reference_(Encode(static_cast<MirrorType*>(ptr))) {}
+  ALWAYS_INLINE ObjPtr(Type* ptr)  // NOLINT
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      : reference_(Encode(static_cast<MirrorType*>(ptr))) {
+    static_assert(std::is_base_of<MirrorType, Type>::value,
+                  "Input type must be a subtype of the ObjPtr type");
+  }
 
   template <typename Type>
-  ALWAYS_INLINE ObjPtr(const ObjPtr<Type>& other) REQUIRES_SHARED(Locks::mutator_lock_)
-      : reference_(Encode(static_cast<MirrorType*>(other.Ptr()))) {}
+  ALWAYS_INLINE ObjPtr(const ObjPtr<Type, kPoison>& other)  // NOLINT
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      : reference_(Encode(static_cast<MirrorType*>(other.Ptr()))) {
+    static_assert(std::is_base_of<MirrorType, Type>::value,
+                  "Input type must be a subtype of the ObjPtr type");
+  }
 
   template <typename Type>
-  ALWAYS_INLINE ObjPtr& operator=(const ObjPtr& other) {
+  ALWAYS_INLINE ObjPtr& operator=(const ObjPtr<Type, kPoison>& other)
+      REQUIRES_SHARED(Locks::mutator_lock_) {
+    static_assert(std::is_base_of<MirrorType, Type>::value,
+                  "Input type must be a subtype of the ObjPtr type");
     reference_ = Encode(static_cast<MirrorType*>(other.Ptr()));
     return *this;
   }
@@ -122,6 +141,14 @@
     }
   }
 
+  // Static function to be friendly with null pointers.
+  template <typename SourceType>
+  static ObjPtr<MirrorType> DownCast(ObjPtr<SourceType> ptr) REQUIRES_SHARED(Locks::mutator_lock_) {
+    static_assert(std::is_base_of<SourceType, MirrorType>::value,
+                  "Target type must be a subtype of source type");
+    return static_cast<MirrorType*>(ptr.Ptr());
+  }
+
  private:
   // Trim off high bits of thread local cookie.
   ALWAYS_INLINE static uintptr_t TrimCookie(uintptr_t cookie) {
@@ -137,6 +164,18 @@
   uintptr_t reference_;
 };
 
+static_assert(std::is_trivially_copyable<ObjPtr<void>>::value,
+              "ObjPtr should be trivially copyable");
+
+// Hash function for stl data structures.
+class HashObjPtr {
+ public:
+  template<class MirrorType, bool kPoison>
+  size_t operator()(const ObjPtr<MirrorType, kPoison>& ptr) const NO_THREAD_SAFETY_ANALYSIS {
+    return std::hash<MirrorType*>()(ptr.Ptr());
+  }
+};
+
 template<class MirrorType, bool kPoison, typename PointerType>
 ALWAYS_INLINE bool operator==(const PointerType* a, const ObjPtr<MirrorType, kPoison>& b)
     REQUIRES_SHARED(Locks::mutator_lock_) {
diff --git a/runtime/openjdkjvmti/Android.bp b/runtime/openjdkjvmti/Android.bp
index d7a6c0a..b323aef 100644
--- a/runtime/openjdkjvmti/Android.bp
+++ b/runtime/openjdkjvmti/Android.bp
@@ -17,7 +17,13 @@
     name: "libopenjdkjvmti_defaults",
     defaults: ["art_defaults"],
     host_supported: true,
-    srcs: ["OpenjdkJvmTi.cc",
+    srcs: ["events.cc",
+           "object_tagging.cc",
+           "OpenjdkJvmTi.cc",
+           "ti_class.cc",
+           "ti_heap.cc",
+           "ti_method.cc",
+           "ti_stack.cc",
            "transform.cc"],
     include_dirs: ["art/runtime"],
     shared_libs: [
diff --git a/runtime/openjdkjvmti/OpenjdkJvmTi.cc b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
index a1a2361..6480843 100644
--- a/runtime/openjdkjvmti/OpenjdkJvmTi.cc
+++ b/runtime/openjdkjvmti/OpenjdkJvmTi.cc
@@ -37,8 +37,19 @@
 #include "openjdkjvmti/jvmti.h"
 
 #include "art_jvmti.h"
+#include "base/mutex.h"
+#include "events-inl.h"
 #include "jni_env_ext-inl.h"
+#include "object_tagging.h"
+#include "obj_ptr-inl.h"
 #include "runtime.h"
+#include "scoped_thread_state_change-inl.h"
+#include "thread_list.h"
+#include "thread-inl.h"
+#include "ti_class.h"
+#include "ti_heap.h"
+#include "ti_method.h"
+#include "ti_stack.h"
 #include "transform.h"
 
 // TODO Remove this at some point by annotating all the methods. It was put in to make the skeleton
@@ -47,20 +58,41 @@
 
 namespace openjdkjvmti {
 
+EventHandler gEventHandler;
+ObjectTagTable gObjectTagTable(&gEventHandler);
+
+#define ENSURE_NON_NULL(n)      \
+  do {                          \
+    if ((n) == nullptr) {       \
+      return ERR(NULL_POINTER); \
+    }                           \
+  } while (false)
+
 class JvmtiFunctions {
  private:
   static bool IsValidEnv(jvmtiEnv* env) {
     return env != nullptr;
   }
 
+#define ENSURE_VALID_ENV(env)          \
+  do {                                 \
+    if (!IsValidEnv(env)) {            \
+      return ERR(INVALID_ENVIRONMENT); \
+    }                                  \
+  } while (false)
+
+#define ENSURE_HAS_CAP(env, cap) \
+  do { \
+    ENSURE_VALID_ENV(env); \
+    if (ArtJvmTiEnv::AsArtJvmTiEnv(env)->capabilities.cap != 1) { \
+      return ERR(MUST_POSSESS_CAPABILITY); \
+    } \
+  } while (false)
+
  public:
   static jvmtiError Allocate(jvmtiEnv* env, jlong size, unsigned char** mem_ptr) {
-    if (!IsValidEnv(env)) {
-      return ERR(INVALID_ENVIRONMENT);
-    }
-    if (mem_ptr == nullptr) {
-      return ERR(NULL_POINTER);
-    }
+    ENSURE_VALID_ENV(env);
+    ENSURE_NON_NULL(mem_ptr);
     if (size < 0) {
       return ERR(ILLEGAL_ARGUMENT);
     } else if (size == 0) {
@@ -72,9 +104,7 @@
   }
 
   static jvmtiError Deallocate(jvmtiEnv* env, unsigned char* mem) {
-    if (!IsValidEnv(env)) {
-      return ERR(INVALID_ENVIRONMENT);
-    }
+    ENSURE_VALID_ENV(env);
     if (mem != nullptr) {
       free(mem);
     }
@@ -144,7 +174,7 @@
   static jvmtiError GetCurrentContendedMonitor(jvmtiEnv* env,
                                                jthread thread,
                                                jobject* monitor_ptr) {
-  return ERR(NOT_IMPLEMENTED);
+    return ERR(NOT_IMPLEMENTED);
   }
 
   static jvmtiError RunAgentThread(jvmtiEnv* env,
@@ -190,7 +220,12 @@
                                   jint max_frame_count,
                                   jvmtiFrameInfo* frame_buffer,
                                   jint* count_ptr) {
-    return ERR(NOT_IMPLEMENTED);
+    return StackUtil::GetStackTrace(env,
+                                    thread,
+                                    start_depth,
+                                    max_frame_count,
+                                    frame_buffer,
+                                    count_ptr);
   }
 
   static jvmtiError GetAllStackTraces(jvmtiEnv* env,
@@ -258,7 +293,13 @@
                                      jobject initial_object,
                                      const jvmtiHeapCallbacks* callbacks,
                                      const void* user_data) {
-    return ERR(NOT_IMPLEMENTED);
+    HeapUtil heap_util(&gObjectTagTable);
+    return heap_util.FollowReferences(env,
+                                      heap_filter,
+                                      klass,
+                                      initial_object,
+                                      callbacks,
+                                      user_data);
   }
 
   static jvmtiError IterateThroughHeap(jvmtiEnv* env,
@@ -266,15 +307,45 @@
                                        jclass klass,
                                        const jvmtiHeapCallbacks* callbacks,
                                        const void* user_data) {
-    return ERR(NOT_IMPLEMENTED);
+    ENSURE_HAS_CAP(env, can_tag_objects);
+    HeapUtil heap_util(&gObjectTagTable);
+    return heap_util.IterateThroughHeap(env, heap_filter, klass, callbacks, user_data);
   }
 
   static jvmtiError GetTag(jvmtiEnv* env, jobject object, jlong* tag_ptr) {
-    return ERR(NOT_IMPLEMENTED);
+    ENSURE_HAS_CAP(env, can_tag_objects);
+
+    JNIEnv* jni_env = GetJniEnv(env);
+    if (jni_env == nullptr) {
+      return ERR(INTERNAL);
+    }
+
+    art::ScopedObjectAccess soa(jni_env);
+    art::ObjPtr<art::mirror::Object> obj = soa.Decode<art::mirror::Object>(object);
+    if (!gObjectTagTable.GetTag(obj.Ptr(), tag_ptr)) {
+      *tag_ptr = 0;
+    }
+
+    return ERR(NONE);
   }
 
   static jvmtiError SetTag(jvmtiEnv* env, jobject object, jlong tag) {
-    return ERR(NOT_IMPLEMENTED);
+    ENSURE_HAS_CAP(env, can_tag_objects);
+
+    if (object == nullptr) {
+      return ERR(NULL_POINTER);
+    }
+
+    JNIEnv* jni_env = GetJniEnv(env);
+    if (jni_env == nullptr) {
+      return ERR(INTERNAL);
+    }
+
+    art::ScopedObjectAccess soa(jni_env);
+    art::ObjPtr<art::mirror::Object> obj = soa.Decode<art::mirror::Object>(object);
+    gObjectTagTable.Set(obj.Ptr(), tag);
+
+    return ERR(NONE);
   }
 
   static jvmtiError GetObjectsWithTags(jvmtiEnv* env,
@@ -283,11 +354,24 @@
                                        jint* count_ptr,
                                        jobject** object_result_ptr,
                                        jlong** tag_result_ptr) {
-    return ERR(NOT_IMPLEMENTED);
+    ENSURE_HAS_CAP(env, can_tag_objects);
+
+    JNIEnv* jni_env = GetJniEnv(env);
+    if (jni_env == nullptr) {
+      return ERR(INTERNAL);
+    }
+
+    art::ScopedObjectAccess soa(jni_env);
+    return gObjectTagTable.GetTaggedObjects(env,
+                                            tag_count,
+                                            tags,
+                                            count_ptr,
+                                            object_result_ptr,
+                                            tag_result_ptr);
   }
 
   static jvmtiError ForceGarbageCollection(jvmtiEnv* env) {
-    return ERR(NOT_IMPLEMENTED);
+    return HeapUtil::ForceGarbageCollection(env);
   }
 
   static jvmtiError IterateOverObjectsReachableFromObject(
@@ -433,7 +517,8 @@
   }
 
   static jvmtiError GetLoadedClasses(jvmtiEnv* env, jint* class_count_ptr, jclass** classes_ptr) {
-    return ERR(NOT_IMPLEMENTED);
+    HeapUtil heap_util(&gObjectTagTable);
+    return heap_util.GetLoadedClasses(env, class_count_ptr, classes_ptr);
   }
 
   static jvmtiError GetClassLoaderClasses(jvmtiEnv* env,
@@ -447,7 +532,7 @@
                                       jclass klass,
                                       char** signature_ptr,
                                       char** generic_ptr) {
-    return ERR(NOT_IMPLEMENTED);
+    return ClassUtil::GetClassSignature(env, klass, signature_ptr, generic_ptr);
   }
 
   static jvmtiError GetClassStatus(jvmtiEnv* env, jclass klass, jint* status_ptr) {
@@ -583,19 +668,19 @@
                                   char** name_ptr,
                                   char** signature_ptr,
                                   char** generic_ptr) {
-    return ERR(NOT_IMPLEMENTED);
+    return MethodUtil::GetMethodName(env, method, name_ptr, signature_ptr, generic_ptr);
   }
 
   static jvmtiError GetMethodDeclaringClass(jvmtiEnv* env,
                                             jmethodID method,
                                             jclass* declaring_class_ptr) {
-    return ERR(NOT_IMPLEMENTED);
+    return MethodUtil::GetMethodDeclaringClass(env, method, declaring_class_ptr);
   }
 
   static jvmtiError GetMethodModifiers(jvmtiEnv* env,
                                        jmethodID method,
                                        jint* modifiers_ptr) {
-    return ERR(NOT_IMPLEMENTED);
+    return MethodUtil::GetMethodModifiers(env, method, modifiers_ptr);
   }
 
   static jvmtiError GetMaxLocals(jvmtiEnv* env,
@@ -694,10 +779,31 @@
     return ERR(NOT_IMPLEMENTED);
   }
 
+  // TODO: This will require locking, so that an agent can't remove callbacks when we're dispatching
+  //       an event.
   static jvmtiError SetEventCallbacks(jvmtiEnv* env,
                                       const jvmtiEventCallbacks* callbacks,
                                       jint size_of_callbacks) {
-    return ERR(NOT_IMPLEMENTED);
+    ENSURE_VALID_ENV(env);
+    if (size_of_callbacks < 0) {
+      return ERR(ILLEGAL_ARGUMENT);
+    }
+
+    if (callbacks == nullptr) {
+      ArtJvmTiEnv::AsArtJvmTiEnv(env)->event_callbacks.reset();
+      return ERR(NONE);
+    }
+
+    std::unique_ptr<jvmtiEventCallbacks> tmp(new jvmtiEventCallbacks());
+    memset(tmp.get(), 0, sizeof(jvmtiEventCallbacks));
+    size_t copy_size = std::min(sizeof(jvmtiEventCallbacks),
+                                static_cast<size_t>(size_of_callbacks));
+    copy_size = art::RoundDown(copy_size, sizeof(void*));
+    memcpy(tmp.get(), callbacks, copy_size);
+
+    ArtJvmTiEnv::AsArtJvmTiEnv(env)->event_callbacks = std::move(tmp);
+
+    return ERR(NONE);
   }
 
   static jvmtiError SetEventNotificationMode(jvmtiEnv* env,
@@ -705,7 +811,23 @@
                                              jvmtiEvent event_type,
                                              jthread event_thread,
                                              ...) {
-    return ERR(NOT_IMPLEMENTED);
+    ENSURE_VALID_ENV(env);
+    // TODO: Check for capabilities.
+    art::Thread* art_thread = nullptr;
+    if (event_thread != nullptr) {
+      // TODO: Need non-aborting call here, to return JVMTI_ERROR_INVALID_THREAD.
+      art::ScopedObjectAccess soa(art::Thread::Current());
+      art::MutexLock mu(soa.Self(), *art::Locks::thread_list_lock_);
+      art_thread = art::Thread::FromManagedThread(soa, event_thread);
+
+      if (art_thread == nullptr ||  // The thread hasn't been started or is already dead.
+          art_thread->IsStillStarting()) {
+        // TODO: We may want to let the EventHandler know, so it could clean up masks, potentially.
+        return ERR(THREAD_NOT_ALIVE);
+      }
+    }
+
+    return gEventHandler.SetEvent(ArtJvmTiEnv::AsArtJvmTiEnv(env), art_thread, event_type, mode);
   }
 
   static jvmtiError GenerateEvents(jvmtiEnv* env, jvmtiEvent event_type) {
@@ -731,20 +853,136 @@
   }
 
   static jvmtiError GetPotentialCapabilities(jvmtiEnv* env, jvmtiCapabilities* capabilities_ptr) {
-    return ERR(NOT_IMPLEMENTED);
+    ENSURE_VALID_ENV(env);
+    ENSURE_NON_NULL(capabilities_ptr);
+    *capabilities_ptr = kPotentialCapabilities;
+    return OK;
   }
 
   static jvmtiError AddCapabilities(jvmtiEnv* env, const jvmtiCapabilities* capabilities_ptr) {
-    return ERR(NOT_IMPLEMENTED);
+    ENSURE_VALID_ENV(env);
+    ENSURE_NON_NULL(capabilities_ptr);
+    ArtJvmTiEnv* art_env = static_cast<ArtJvmTiEnv*>(env);
+    jvmtiError ret = OK;
+#define ADD_CAPABILITY(e) \
+    do { \
+      if (capabilities_ptr->e == 1) { \
+        if (kPotentialCapabilities.e == 1) { \
+          art_env->capabilities.e = 1;\
+        } else { \
+          ret = ERR(NOT_AVAILABLE); \
+        } \
+      } \
+    } while (false)
+
+    ADD_CAPABILITY(can_tag_objects);
+    ADD_CAPABILITY(can_generate_field_modification_events);
+    ADD_CAPABILITY(can_generate_field_access_events);
+    ADD_CAPABILITY(can_get_bytecodes);
+    ADD_CAPABILITY(can_get_synthetic_attribute);
+    ADD_CAPABILITY(can_get_owned_monitor_info);
+    ADD_CAPABILITY(can_get_current_contended_monitor);
+    ADD_CAPABILITY(can_get_monitor_info);
+    ADD_CAPABILITY(can_pop_frame);
+    ADD_CAPABILITY(can_redefine_classes);
+    ADD_CAPABILITY(can_signal_thread);
+    ADD_CAPABILITY(can_get_source_file_name);
+    ADD_CAPABILITY(can_get_line_numbers);
+    ADD_CAPABILITY(can_get_source_debug_extension);
+    ADD_CAPABILITY(can_access_local_variables);
+    ADD_CAPABILITY(can_maintain_original_method_order);
+    ADD_CAPABILITY(can_generate_single_step_events);
+    ADD_CAPABILITY(can_generate_exception_events);
+    ADD_CAPABILITY(can_generate_frame_pop_events);
+    ADD_CAPABILITY(can_generate_breakpoint_events);
+    ADD_CAPABILITY(can_suspend);
+    ADD_CAPABILITY(can_redefine_any_class);
+    ADD_CAPABILITY(can_get_current_thread_cpu_time);
+    ADD_CAPABILITY(can_get_thread_cpu_time);
+    ADD_CAPABILITY(can_generate_method_entry_events);
+    ADD_CAPABILITY(can_generate_method_exit_events);
+    ADD_CAPABILITY(can_generate_all_class_hook_events);
+    ADD_CAPABILITY(can_generate_compiled_method_load_events);
+    ADD_CAPABILITY(can_generate_monitor_events);
+    ADD_CAPABILITY(can_generate_vm_object_alloc_events);
+    ADD_CAPABILITY(can_generate_native_method_bind_events);
+    ADD_CAPABILITY(can_generate_garbage_collection_events);
+    ADD_CAPABILITY(can_generate_object_free_events);
+    ADD_CAPABILITY(can_force_early_return);
+    ADD_CAPABILITY(can_get_owned_monitor_stack_depth_info);
+    ADD_CAPABILITY(can_get_constant_pool);
+    ADD_CAPABILITY(can_set_native_method_prefix);
+    ADD_CAPABILITY(can_retransform_classes);
+    ADD_CAPABILITY(can_retransform_any_class);
+    ADD_CAPABILITY(can_generate_resource_exhaustion_heap_events);
+    ADD_CAPABILITY(can_generate_resource_exhaustion_threads_events);
+#undef ADD_CAPABILITY
+    return ret;
   }
 
   static jvmtiError RelinquishCapabilities(jvmtiEnv* env,
                                            const jvmtiCapabilities* capabilities_ptr) {
-    return ERR(NOT_IMPLEMENTED);
+    ENSURE_VALID_ENV(env);
+    ENSURE_NON_NULL(capabilities_ptr);
+    ArtJvmTiEnv* art_env = reinterpret_cast<ArtJvmTiEnv*>(env);
+#define DEL_CAPABILITY(e) \
+    do { \
+      if (capabilities_ptr->e == 1) { \
+        art_env->capabilities.e = 0;\
+      } \
+    } while (false)
+
+    DEL_CAPABILITY(can_tag_objects);
+    DEL_CAPABILITY(can_generate_field_modification_events);
+    DEL_CAPABILITY(can_generate_field_access_events);
+    DEL_CAPABILITY(can_get_bytecodes);
+    DEL_CAPABILITY(can_get_synthetic_attribute);
+    DEL_CAPABILITY(can_get_owned_monitor_info);
+    DEL_CAPABILITY(can_get_current_contended_monitor);
+    DEL_CAPABILITY(can_get_monitor_info);
+    DEL_CAPABILITY(can_pop_frame);
+    DEL_CAPABILITY(can_redefine_classes);
+    DEL_CAPABILITY(can_signal_thread);
+    DEL_CAPABILITY(can_get_source_file_name);
+    DEL_CAPABILITY(can_get_line_numbers);
+    DEL_CAPABILITY(can_get_source_debug_extension);
+    DEL_CAPABILITY(can_access_local_variables);
+    DEL_CAPABILITY(can_maintain_original_method_order);
+    DEL_CAPABILITY(can_generate_single_step_events);
+    DEL_CAPABILITY(can_generate_exception_events);
+    DEL_CAPABILITY(can_generate_frame_pop_events);
+    DEL_CAPABILITY(can_generate_breakpoint_events);
+    DEL_CAPABILITY(can_suspend);
+    DEL_CAPABILITY(can_redefine_any_class);
+    DEL_CAPABILITY(can_get_current_thread_cpu_time);
+    DEL_CAPABILITY(can_get_thread_cpu_time);
+    DEL_CAPABILITY(can_generate_method_entry_events);
+    DEL_CAPABILITY(can_generate_method_exit_events);
+    DEL_CAPABILITY(can_generate_all_class_hook_events);
+    DEL_CAPABILITY(can_generate_compiled_method_load_events);
+    DEL_CAPABILITY(can_generate_monitor_events);
+    DEL_CAPABILITY(can_generate_vm_object_alloc_events);
+    DEL_CAPABILITY(can_generate_native_method_bind_events);
+    DEL_CAPABILITY(can_generate_garbage_collection_events);
+    DEL_CAPABILITY(can_generate_object_free_events);
+    DEL_CAPABILITY(can_force_early_return);
+    DEL_CAPABILITY(can_get_owned_monitor_stack_depth_info);
+    DEL_CAPABILITY(can_get_constant_pool);
+    DEL_CAPABILITY(can_set_native_method_prefix);
+    DEL_CAPABILITY(can_retransform_classes);
+    DEL_CAPABILITY(can_retransform_any_class);
+    DEL_CAPABILITY(can_generate_resource_exhaustion_heap_events);
+    DEL_CAPABILITY(can_generate_resource_exhaustion_threads_events);
+#undef DEL_CAPABILITY
+    return OK;
   }
 
   static jvmtiError GetCapabilities(jvmtiEnv* env, jvmtiCapabilities* capabilities_ptr) {
-    return ERR(NOT_IMPLEMENTED);
+    ENSURE_VALID_ENV(env);
+    ENSURE_NON_NULL(capabilities_ptr);
+    ArtJvmTiEnv* artenv = reinterpret_cast<ArtJvmTiEnv*>(env);
+    *capabilities_ptr = artenv->capabilities;
+    return OK;
   }
 
   static jvmtiError GetCurrentThreadCpuTimerInfo(jvmtiEnv* env, jvmtiTimerInfo* info_ptr) {
@@ -800,44 +1038,31 @@
   }
 
   static jvmtiError DisposeEnvironment(jvmtiEnv* env) {
-    if (!IsValidEnv(env)) {
-      return ERR(INVALID_ENVIRONMENT);
-    }
+    ENSURE_VALID_ENV(env);
     delete env;
     return OK;
   }
 
   static jvmtiError SetEnvironmentLocalStorage(jvmtiEnv* env, const void* data) {
-    if (!IsValidEnv(env)) {
-      return ERR(INVALID_ENVIRONMENT);
-    }
+    ENSURE_VALID_ENV(env);
     reinterpret_cast<ArtJvmTiEnv*>(env)->local_data = const_cast<void*>(data);
     return OK;
   }
 
   static jvmtiError GetEnvironmentLocalStorage(jvmtiEnv* env, void** data_ptr) {
-    if (!IsValidEnv(env)) {
-      return ERR(INVALID_ENVIRONMENT);
-    }
+    ENSURE_VALID_ENV(env);
     *data_ptr = reinterpret_cast<ArtJvmTiEnv*>(env)->local_data;
     return OK;
   }
 
   static jvmtiError GetVersionNumber(jvmtiEnv* env, jint* version_ptr) {
-    if (!IsValidEnv(env)) {
-      return ERR(INVALID_ENVIRONMENT);
-    }
+    ENSURE_VALID_ENV(env);
     *version_ptr = JVMTI_VERSION;
     return OK;
   }
 
   static jvmtiError GetErrorName(jvmtiEnv* env, jvmtiError error,  char** name_ptr) {
-    if (!IsValidEnv(env)) {
-      return ERR(INVALID_ENVIRONMENT);
-    }
-    if (name_ptr == nullptr) {
-      return ERR(NULL_POINTER);
-    }
+    ENSURE_NON_NULL(name_ptr);
     switch (error) {
 #define ERROR_CASE(e) case (JVMTI_ERROR_ ## e) : do { \
           *name_ptr = const_cast<char*>("JVMTI_ERROR_"#e); \
@@ -981,6 +1206,8 @@
 static void CreateArtJvmTiEnv(art::JavaVMExt* vm, /*out*/void** new_jvmtiEnv) {
   struct ArtJvmTiEnv* env = new ArtJvmTiEnv(vm);
   *new_jvmtiEnv = env;
+
+  gEventHandler.RegisterArtJvmTiEnv(env);
 }
 
 // A hook that the runtime uses to allow plugins to handle GetEnv calls. It returns true and
@@ -998,7 +1225,9 @@
 
 // The plugin initialization function. This adds the jvmti environment.
 extern "C" bool ArtPlugin_Initialize() {
-  art::Runtime::Current()->GetJavaVM()->AddEnvironmentHook(GetEnvHandler);
+  art::Runtime* runtime = art::Runtime::Current();
+  runtime->GetJavaVM()->AddEnvironmentHook(GetEnvHandler);
+  runtime->AddSystemWeakHolder(&gObjectTagTable);
   return true;
 }
 
diff --git a/runtime/openjdkjvmti/art_jvmti.h b/runtime/openjdkjvmti/art_jvmti.h
index a2c6882..48b29a3 100644
--- a/runtime/openjdkjvmti/art_jvmti.h
+++ b/runtime/openjdkjvmti/art_jvmti.h
@@ -32,8 +32,14 @@
 #ifndef ART_RUNTIME_OPENJDKJVMTI_ART_JVMTI_H_
 #define ART_RUNTIME_OPENJDKJVMTI_ART_JVMTI_H_
 
+#include <memory>
+
 #include <jni.h>
 
+#include "base/casts.h"
+#include "base/logging.h"
+#include "base/macros.h"
+#include "events.h"
 #include "java_vm_ext.h"
 #include "jni_env_ext.h"
 #include "jvmti.h"
@@ -46,10 +52,19 @@
 struct ArtJvmTiEnv : public jvmtiEnv {
   art::JavaVMExt* art_vm;
   void* local_data;
+  jvmtiCapabilities capabilities;
 
-  explicit ArtJvmTiEnv(art::JavaVMExt* runtime) : art_vm(runtime), local_data(nullptr) {
+  EventMasks event_masks;
+  std::unique_ptr<jvmtiEventCallbacks> event_callbacks;
+
+  explicit ArtJvmTiEnv(art::JavaVMExt* runtime)
+      : art_vm(runtime), local_data(nullptr), capabilities() {
     functions = &gJvmtiInterface;
   }
+
+  static ArtJvmTiEnv* AsArtJvmTiEnv(jvmtiEnv* env) {
+    return art::down_cast<ArtJvmTiEnv*>(env);
+  }
 };
 
 // Macro and constexpr to make error values less annoying to write.
@@ -69,6 +84,89 @@
   return ret_value;
 }
 
+class JvmtiDeleter {
+ public:
+  JvmtiDeleter() : env_(nullptr) {}
+  explicit JvmtiDeleter(jvmtiEnv* env) : env_(env) {}
+
+  JvmtiDeleter(JvmtiDeleter&) = default;
+  JvmtiDeleter(JvmtiDeleter&&) = default;
+  JvmtiDeleter& operator=(const JvmtiDeleter&) = default;
+
+  void operator()(unsigned char* ptr) const {
+    CHECK(env_ != nullptr);
+    jvmtiError ret = env_->Deallocate(ptr);
+    CHECK(ret == ERR(NONE));
+  }
+
+ private:
+  mutable jvmtiEnv* env_;
+};
+
+using JvmtiUniquePtr = std::unique_ptr<unsigned char, JvmtiDeleter>;
+
+ALWAYS_INLINE
+static inline JvmtiUniquePtr MakeJvmtiUniquePtr(jvmtiEnv* env, unsigned char* mem) {
+  return JvmtiUniquePtr(mem, JvmtiDeleter(env));
+}
+
+ALWAYS_INLINE
+static inline jvmtiError CopyString(jvmtiEnv* env, const char* src, unsigned char** copy) {
+  size_t len = strlen(src) + 1;
+  unsigned char* buf;
+  jvmtiError ret = env->Allocate(len, &buf);
+  if (ret != ERR(NONE)) {
+    return ret;
+  }
+  strcpy(reinterpret_cast<char*>(buf), src);
+  *copy = buf;
+  return ret;
+}
+
+const jvmtiCapabilities kPotentialCapabilities = {
+    .can_tag_objects                                 = 1,
+    .can_generate_field_modification_events          = 0,
+    .can_generate_field_access_events                = 0,
+    .can_get_bytecodes                               = 0,
+    .can_get_synthetic_attribute                     = 0,
+    .can_get_owned_monitor_info                      = 0,
+    .can_get_current_contended_monitor               = 0,
+    .can_get_monitor_info                            = 0,
+    .can_pop_frame                                   = 0,
+    .can_redefine_classes                            = 0,
+    .can_signal_thread                               = 0,
+    .can_get_source_file_name                        = 0,
+    .can_get_line_numbers                            = 0,
+    .can_get_source_debug_extension                  = 0,
+    .can_access_local_variables                      = 0,
+    .can_maintain_original_method_order              = 0,
+    .can_generate_single_step_events                 = 0,
+    .can_generate_exception_events                   = 0,
+    .can_generate_frame_pop_events                   = 0,
+    .can_generate_breakpoint_events                  = 0,
+    .can_suspend                                     = 0,
+    .can_redefine_any_class                          = 0,
+    .can_get_current_thread_cpu_time                 = 0,
+    .can_get_thread_cpu_time                         = 0,
+    .can_generate_method_entry_events                = 0,
+    .can_generate_method_exit_events                 = 0,
+    .can_generate_all_class_hook_events              = 0,
+    .can_generate_compiled_method_load_events        = 0,
+    .can_generate_monitor_events                     = 0,
+    .can_generate_vm_object_alloc_events             = 0,
+    .can_generate_native_method_bind_events          = 0,
+    .can_generate_garbage_collection_events          = 0,
+    .can_generate_object_free_events                 = 0,
+    .can_force_early_return                          = 0,
+    .can_get_owned_monitor_stack_depth_info          = 0,
+    .can_get_constant_pool                           = 0,
+    .can_set_native_method_prefix                    = 0,
+    .can_retransform_classes                         = 0,
+    .can_retransform_any_class                       = 0,
+    .can_generate_resource_exhaustion_heap_events    = 0,
+    .can_generate_resource_exhaustion_threads_events = 0,
+};
+
 }  // namespace openjdkjvmti
 
 #endif  // ART_RUNTIME_OPENJDKJVMTI_ART_JVMTI_H_
diff --git a/runtime/openjdkjvmti/events-inl.h b/runtime/openjdkjvmti/events-inl.h
new file mode 100644
index 0000000..d027201
--- /dev/null
+++ b/runtime/openjdkjvmti/events-inl.h
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_OPENJDKJVMTI_EVENTS_INL_H_
+#define ART_RUNTIME_OPENJDKJVMTI_EVENTS_INL_H_
+
+#include "events.h"
+
+#include "art_jvmti.h"
+
+namespace openjdkjvmti {
+
+template <typename FnType>
+ALWAYS_INLINE static inline FnType* GetCallback(ArtJvmTiEnv* env, jvmtiEvent event) {
+  if (env->event_callbacks == nullptr) {
+    return nullptr;
+  }
+
+  // TODO: Add a type check. Can be done, for example, by an explicitly instantiated template
+  //       function.
+
+  switch (event) {
+    case JVMTI_EVENT_VM_INIT:
+      return reinterpret_cast<FnType*>(env->event_callbacks->VMInit);
+    case JVMTI_EVENT_VM_DEATH:
+      return reinterpret_cast<FnType*>(env->event_callbacks->VMDeath);
+    case JVMTI_EVENT_THREAD_START:
+      return reinterpret_cast<FnType*>(env->event_callbacks->ThreadStart);
+    case JVMTI_EVENT_THREAD_END:
+      return reinterpret_cast<FnType*>(env->event_callbacks->ThreadEnd);
+    case JVMTI_EVENT_CLASS_FILE_LOAD_HOOK:
+      return reinterpret_cast<FnType*>(env->event_callbacks->ClassFileLoadHook);
+    case JVMTI_EVENT_CLASS_LOAD:
+      return reinterpret_cast<FnType*>(env->event_callbacks->ClassLoad);
+    case JVMTI_EVENT_CLASS_PREPARE:
+      return reinterpret_cast<FnType*>(env->event_callbacks->ClassPrepare);
+    case JVMTI_EVENT_VM_START:
+      return reinterpret_cast<FnType*>(env->event_callbacks->VMStart);
+    case JVMTI_EVENT_EXCEPTION:
+      return reinterpret_cast<FnType*>(env->event_callbacks->Exception);
+    case JVMTI_EVENT_EXCEPTION_CATCH:
+      return reinterpret_cast<FnType*>(env->event_callbacks->ExceptionCatch);
+    case JVMTI_EVENT_SINGLE_STEP:
+      return reinterpret_cast<FnType*>(env->event_callbacks->SingleStep);
+    case JVMTI_EVENT_FRAME_POP:
+      return reinterpret_cast<FnType*>(env->event_callbacks->FramePop);
+    case JVMTI_EVENT_BREAKPOINT:
+      return reinterpret_cast<FnType*>(env->event_callbacks->Breakpoint);
+    case JVMTI_EVENT_FIELD_ACCESS:
+      return reinterpret_cast<FnType*>(env->event_callbacks->FieldAccess);
+    case JVMTI_EVENT_FIELD_MODIFICATION:
+      return reinterpret_cast<FnType*>(env->event_callbacks->FieldModification);
+    case JVMTI_EVENT_METHOD_ENTRY:
+      return reinterpret_cast<FnType*>(env->event_callbacks->MethodEntry);
+    case JVMTI_EVENT_METHOD_EXIT:
+      return reinterpret_cast<FnType*>(env->event_callbacks->MethodExit);
+    case JVMTI_EVENT_NATIVE_METHOD_BIND:
+      return reinterpret_cast<FnType*>(env->event_callbacks->NativeMethodBind);
+    case JVMTI_EVENT_COMPILED_METHOD_LOAD:
+      return reinterpret_cast<FnType*>(env->event_callbacks->CompiledMethodLoad);
+    case JVMTI_EVENT_COMPILED_METHOD_UNLOAD:
+      return reinterpret_cast<FnType*>(env->event_callbacks->CompiledMethodUnload);
+    case JVMTI_EVENT_DYNAMIC_CODE_GENERATED:
+      return reinterpret_cast<FnType*>(env->event_callbacks->DynamicCodeGenerated);
+    case JVMTI_EVENT_DATA_DUMP_REQUEST:
+      return reinterpret_cast<FnType*>(env->event_callbacks->DataDumpRequest);
+    case JVMTI_EVENT_MONITOR_WAIT:
+      return reinterpret_cast<FnType*>(env->event_callbacks->MonitorWait);
+    case JVMTI_EVENT_MONITOR_WAITED:
+      return reinterpret_cast<FnType*>(env->event_callbacks->MonitorWaited);
+    case JVMTI_EVENT_MONITOR_CONTENDED_ENTER:
+      return reinterpret_cast<FnType*>(env->event_callbacks->MonitorContendedEnter);
+    case JVMTI_EVENT_MONITOR_CONTENDED_ENTERED:
+      return reinterpret_cast<FnType*>(env->event_callbacks->MonitorContendedEntered);
+    case JVMTI_EVENT_RESOURCE_EXHAUSTED:
+      return reinterpret_cast<FnType*>(env->event_callbacks->ResourceExhausted);
+    case JVMTI_EVENT_GARBAGE_COLLECTION_START:
+      return reinterpret_cast<FnType*>(env->event_callbacks->GarbageCollectionStart);
+    case JVMTI_EVENT_GARBAGE_COLLECTION_FINISH:
+      return reinterpret_cast<FnType*>(env->event_callbacks->GarbageCollectionFinish);
+    case JVMTI_EVENT_OBJECT_FREE:
+      return reinterpret_cast<FnType*>(env->event_callbacks->ObjectFree);
+    case JVMTI_EVENT_VM_OBJECT_ALLOC:
+      return reinterpret_cast<FnType*>(env->event_callbacks->VMObjectAlloc);
+  }
+  return nullptr;
+}
+
+template <typename ...Args>
+inline void EventHandler::DispatchEvent(art::Thread* thread, jvmtiEvent event, Args... args) {
+  using FnType = void(jvmtiEnv*, Args...);
+  for (ArtJvmTiEnv* env : envs) {
+    bool dispatch = env->event_masks.global_event_mask.Test(event);
+
+    if (!dispatch && thread != nullptr && env->event_masks.unioned_thread_event_mask.Test(event)) {
+      EventMask* mask = env->event_masks.GetEventMaskOrNull(thread);
+      dispatch = mask != nullptr && mask->Test(event);
+    }
+
+    if (dispatch) {
+      FnType* callback = GetCallback<FnType>(env, event);
+      if (callback != nullptr) {
+        (*callback)(env, args...);
+      }
+    }
+  }
+}
+
+}  // namespace openjdkjvmti
+
+#endif  // ART_RUNTIME_OPENJDKJVMTI_EVENTS_INL_H_
diff --git a/runtime/openjdkjvmti/events.cc b/runtime/openjdkjvmti/events.cc
new file mode 100644
index 0000000..12692a1
--- /dev/null
+++ b/runtime/openjdkjvmti/events.cc
@@ -0,0 +1,327 @@
+/* Copyright (C) 2016 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This file implements interfaces from the file jvmti.h. This implementation
+ * is licensed under the same terms as the file jvmti.h.  The
+ * copyright and license information for the file jvmti.h follows.
+ *
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "events-inl.h"
+
+#include "art_jvmti.h"
+#include "base/logging.h"
+#include "gc/allocation_listener.h"
+#include "gc/gc_pause_listener.h"
+#include "gc/heap.h"
+#include "instrumentation.h"
+#include "jni_env_ext-inl.h"
+#include "mirror/class.h"
+#include "mirror/object.h"
+#include "runtime.h"
+#include "ScopedLocalRef.h"
+#include "scoped_thread_state_change-inl.h"
+#include "thread-inl.h"
+
+namespace openjdkjvmti {
+
+EventMask& EventMasks::GetEventMask(art::Thread* thread) {
+  if (thread == nullptr) {
+    return global_event_mask;
+  }
+
+  for (auto& pair : thread_event_masks) {
+    const UniqueThread& unique_thread = pair.first;
+    if (unique_thread.first == thread &&
+        unique_thread.second == static_cast<uint32_t>(thread->GetTid())) {
+      return pair.second;
+    }
+  }
+
+  // TODO: Remove old UniqueThread with the same pointer, if exists.
+
+  thread_event_masks.emplace_back(UniqueThread(thread, thread->GetTid()), EventMask());
+  return thread_event_masks.back().second;
+}
+
+EventMask* EventMasks::GetEventMaskOrNull(art::Thread* thread) {
+  if (thread == nullptr) {
+    return &global_event_mask;
+  }
+
+  for (auto& pair : thread_event_masks) {
+    const UniqueThread& unique_thread = pair.first;
+    if (unique_thread.first == thread &&
+        unique_thread.second == static_cast<uint32_t>(thread->GetTid())) {
+      return &pair.second;
+    }
+  }
+
+  return nullptr;
+}
+
+
+void EventMasks::EnableEvent(art::Thread* thread, jvmtiEvent event) {
+  DCHECK(EventMask::EventIsInRange(event));
+  GetEventMask(thread).Set(event);
+  if (thread != nullptr) {
+    unioned_thread_event_mask.Set(event, true);
+  }
+}
+
+void EventMasks::DisableEvent(art::Thread* thread, jvmtiEvent event) {
+  DCHECK(EventMask::EventIsInRange(event));
+  GetEventMask(thread).Set(event, false);
+  if (thread != nullptr) {
+    // Regenerate union for the event.
+    bool union_value = false;
+    for (auto& pair : thread_event_masks) {
+      union_value |= pair.second.Test(event);
+      if (union_value) {
+        break;
+      }
+    }
+    unioned_thread_event_mask.Set(event, union_value);
+  }
+}
+
+void EventHandler::RegisterArtJvmTiEnv(ArtJvmTiEnv* env) {
+  envs.push_back(env);
+}
+
+static bool IsThreadControllable(jvmtiEvent event) {
+  switch (event) {
+    case JVMTI_EVENT_VM_INIT:
+    case JVMTI_EVENT_VM_START:
+    case JVMTI_EVENT_VM_DEATH:
+    case JVMTI_EVENT_THREAD_START:
+    case JVMTI_EVENT_COMPILED_METHOD_LOAD:
+    case JVMTI_EVENT_COMPILED_METHOD_UNLOAD:
+    case JVMTI_EVENT_DYNAMIC_CODE_GENERATED:
+    case JVMTI_EVENT_DATA_DUMP_REQUEST:
+      return false;
+
+    default:
+      return true;
+  }
+}
+
+class JvmtiAllocationListener : public art::gc::AllocationListener {
+ public:
+  explicit JvmtiAllocationListener(EventHandler* handler) : handler_(handler) {}
+
+  void ObjectAllocated(art::Thread* self, art::ObjPtr<art::mirror::Object>* obj, size_t byte_count)
+      OVERRIDE REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    DCHECK_EQ(self, art::Thread::Current());
+
+    if (handler_->IsEventEnabledAnywhere(JVMTI_EVENT_VM_OBJECT_ALLOC)) {
+      art::StackHandleScope<1> hs(self);
+      auto h = hs.NewHandleWrapper(obj);
+      // jvmtiEventVMObjectAlloc parameters:
+      //      jvmtiEnv *jvmti_env,
+      //      JNIEnv* jni_env,
+      //      jthread thread,
+      //      jobject object,
+      //      jclass object_klass,
+      //      jlong size
+      art::JNIEnvExt* jni_env = self->GetJniEnv();
+
+      jthread thread_peer;
+      if (self->IsStillStarting()) {
+        thread_peer = nullptr;
+      } else {
+        thread_peer = jni_env->AddLocalReference<jthread>(self->GetPeer());
+      }
+
+      ScopedLocalRef<jthread> thread(jni_env, thread_peer);
+      ScopedLocalRef<jobject> object(
+          jni_env, jni_env->AddLocalReference<jobject>(*obj));
+      ScopedLocalRef<jclass> klass(
+          jni_env, jni_env->AddLocalReference<jclass>(obj->Ptr()->GetClass()));
+
+      handler_->DispatchEvent(self,
+                              JVMTI_EVENT_VM_OBJECT_ALLOC,
+                              jni_env,
+                              thread.get(),
+                              object.get(),
+                              klass.get(),
+                              byte_count);
+    }
+  }
+
+ private:
+  EventHandler* handler_;
+};
+
+static void SetupObjectAllocationTracking(art::gc::AllocationListener* listener, bool enable) {
+  // We must not hold the mutator lock here, but if we're in FastJNI, for example, we might. For
+  // now, do a workaround: (possibly) acquire and release.
+  art::ScopedObjectAccess soa(art::Thread::Current());
+  art::ScopedThreadSuspension sts(soa.Self(), art::ThreadState::kSuspended);
+  if (enable) {
+    art::Runtime::Current()->GetHeap()->SetAllocationListener(listener);
+  } else {
+    art::Runtime::Current()->GetHeap()->RemoveAllocationListener();
+  }
+}
+
+// Report GC pauses (see spec) as GARBAGE_COLLECTION_START and GARBAGE_COLLECTION_END.
+class JvmtiGcPauseListener : public art::gc::GcPauseListener {
+ public:
+  explicit JvmtiGcPauseListener(EventHandler* handler)
+      : handler_(handler),
+        start_enabled_(false),
+        finish_enabled_(false) {}
+
+  void StartPause() OVERRIDE {
+    handler_->DispatchEvent(nullptr, JVMTI_EVENT_GARBAGE_COLLECTION_START);
+  }
+
+  void EndPause() OVERRIDE {
+    handler_->DispatchEvent(nullptr, JVMTI_EVENT_GARBAGE_COLLECTION_FINISH);
+  }
+
+  bool IsEnabled() {
+    return start_enabled_ || finish_enabled_;
+  }
+
+  void SetStartEnabled(bool e) {
+    start_enabled_ = e;
+  }
+
+  void SetFinishEnabled(bool e) {
+    finish_enabled_ = e;
+  }
+
+ private:
+  EventHandler* handler_;
+  bool start_enabled_;
+  bool finish_enabled_;
+};
+
+static void SetupGcPauseTracking(JvmtiGcPauseListener* listener, jvmtiEvent event, bool enable) {
+  bool old_state = listener->IsEnabled();
+
+  if (event == JVMTI_EVENT_GARBAGE_COLLECTION_START) {
+    listener->SetStartEnabled(enable);
+  } else {
+    listener->SetFinishEnabled(enable);
+  }
+
+  bool new_state = listener->IsEnabled();
+
+  if (old_state != new_state) {
+    if (new_state) {
+      art::Runtime::Current()->GetHeap()->SetGcPauseListener(listener);
+    } else {
+      art::Runtime::Current()->GetHeap()->RemoveGcPauseListener();
+    }
+  }
+}
+
+// Handle special work for the given event type, if necessary.
+void EventHandler::HandleEventType(jvmtiEvent event, bool enable) {
+  switch (event) {
+    case JVMTI_EVENT_VM_OBJECT_ALLOC:
+      SetupObjectAllocationTracking(alloc_listener_.get(), enable);
+      return;
+
+    case JVMTI_EVENT_GARBAGE_COLLECTION_START:
+    case JVMTI_EVENT_GARBAGE_COLLECTION_FINISH:
+      SetupGcPauseTracking(gc_pause_listener_.get(), event, enable);
+      return;
+
+    default:
+      break;
+  }
+}
+
+jvmtiError EventHandler::SetEvent(ArtJvmTiEnv* env,
+                                  art::Thread* thread,
+                                  jvmtiEvent event,
+                                  jvmtiEventMode mode) {
+  if (thread != nullptr) {
+    art::ThreadState state = thread->GetState();
+    if (state == art::ThreadState::kStarting ||
+        state == art::ThreadState::kTerminated ||
+        thread->IsStillStarting()) {
+      return ERR(THREAD_NOT_ALIVE);
+    }
+    if (!IsThreadControllable(event)) {
+      return ERR(ILLEGAL_ARGUMENT);
+    }
+  }
+
+  // TODO: Capability check.
+
+  if (mode != JVMTI_ENABLE && mode != JVMTI_DISABLE) {
+    return ERR(ILLEGAL_ARGUMENT);
+  }
+
+  if (!EventMask::EventIsInRange(event)) {
+    return ERR(INVALID_EVENT_TYPE);
+  }
+
+  bool old_state = global_mask.Test(event);
+
+  if (mode == JVMTI_ENABLE) {
+    env->event_masks.EnableEvent(thread, event);
+    global_mask.Set(event);
+  } else {
+    DCHECK_EQ(mode, JVMTI_DISABLE);
+
+    env->event_masks.DisableEvent(thread, event);
+
+    // Gotta recompute the global mask.
+    bool union_value = false;
+    for (const ArtJvmTiEnv* stored_env : envs) {
+      union_value |= stored_env->event_masks.global_event_mask.Test(event);
+      union_value |= stored_env->event_masks.unioned_thread_event_mask.Test(event);
+      if (union_value) {
+        break;
+      }
+    }
+    global_mask.Set(event, union_value);
+  }
+
+  bool new_state = global_mask.Test(event);
+
+  // Handle any special work required for the event type.
+  if (new_state != old_state) {
+    HandleEventType(event, mode == JVMTI_ENABLE);
+  }
+
+  return ERR(NONE);
+}
+
+EventHandler::EventHandler() {
+  alloc_listener_.reset(new JvmtiAllocationListener(this));
+  gc_pause_listener_.reset(new JvmtiGcPauseListener(this));
+}
+
+EventHandler::~EventHandler() {
+}
+
+}  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/events.h b/runtime/openjdkjvmti/events.h
new file mode 100644
index 0000000..07d6bfd
--- /dev/null
+++ b/runtime/openjdkjvmti/events.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_OPENJDKJVMTI_EVENTS_H_
+#define ART_RUNTIME_OPENJDKJVMTI_EVENTS_H_
+
+#include <bitset>
+#include <vector>
+
+#include "base/logging.h"
+#include "jvmti.h"
+#include "thread.h"
+
+namespace openjdkjvmti {
+
+struct ArtJvmTiEnv;
+class JvmtiAllocationListener;
+class JvmtiGcPauseListener;
+
+struct EventMask {
+  static constexpr size_t kEventsSize = JVMTI_MAX_EVENT_TYPE_VAL - JVMTI_MIN_EVENT_TYPE_VAL + 1;
+  std::bitset<kEventsSize> bit_set;
+
+  static bool EventIsInRange(jvmtiEvent event) {
+    return event >= JVMTI_MIN_EVENT_TYPE_VAL && event <= JVMTI_MAX_EVENT_TYPE_VAL;
+  }
+
+  void Set(jvmtiEvent event, bool value = true) {
+    DCHECK(EventIsInRange(event));
+    bit_set.set(event - JVMTI_MIN_EVENT_TYPE_VAL, value);
+  }
+
+  bool Test(jvmtiEvent event) const {
+    DCHECK(EventIsInRange(event));
+    return bit_set.test(event - JVMTI_MIN_EVENT_TYPE_VAL);
+  }
+};
+
+struct EventMasks {
+  // The globally enabled events.
+  EventMask global_event_mask;
+
+  // The per-thread enabled events.
+
+  // It is not enough to store a Thread pointer, as these may be reused. Use the pointer and the
+  // thread id.
+  // Note: We could just use the tid like tracing does.
+  using UniqueThread = std::pair<art::Thread*, uint32_t>;
+  // TODO: Native thread objects are immovable, so we can use them as keys in an (unordered) map,
+  //       if necessary.
+  std::vector<std::pair<UniqueThread, EventMask>> thread_event_masks;
+
+  // A union of the per-thread events, for fast-pathing.
+  EventMask unioned_thread_event_mask;
+
+  EventMask& GetEventMask(art::Thread* thread);
+  EventMask* GetEventMaskOrNull(art::Thread* thread);
+  void EnableEvent(art::Thread* thread, jvmtiEvent event);
+  void DisableEvent(art::Thread* thread, jvmtiEvent event);
+};
+
+// Helper class for event handling.
+class EventHandler {
+ public:
+  EventHandler();
+  ~EventHandler();
+
+  // Register an env. It is assumed that this happens on env creation, that is, no events are
+  // enabled, yet.
+  void RegisterArtJvmTiEnv(ArtJvmTiEnv* env);
+
+  bool IsEventEnabledAnywhere(jvmtiEvent event) {
+    if (!EventMask::EventIsInRange(event)) {
+      return false;
+    }
+    return global_mask.Test(event);
+  }
+
+  jvmtiError SetEvent(ArtJvmTiEnv* env, art::Thread* thread, jvmtiEvent event, jvmtiEventMode mode);
+
+  template <typename ...Args>
+  ALWAYS_INLINE inline void DispatchEvent(art::Thread* thread, jvmtiEvent event, Args... args);
+
+ private:
+  void HandleEventType(jvmtiEvent event, bool enable);
+
+  // List of all JvmTiEnv objects that have been created, in their creation order.
+  std::vector<ArtJvmTiEnv*> envs;
+
+  // A union of all enabled events, anywhere.
+  EventMask global_mask;
+
+  std::unique_ptr<JvmtiAllocationListener> alloc_listener_;
+  std::unique_ptr<JvmtiGcPauseListener> gc_pause_listener_;
+};
+
+}  // namespace openjdkjvmti
+
+#endif  // ART_RUNTIME_OPENJDKJVMTI_EVENTS_H_
diff --git a/runtime/openjdkjvmti/jvmti_allocator.h b/runtime/openjdkjvmti/jvmti_allocator.h
new file mode 100644
index 0000000..1225c14
--- /dev/null
+++ b/runtime/openjdkjvmti/jvmti_allocator.h
@@ -0,0 +1,170 @@
+/* Copyright (C) 2016 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This file implements interfaces from the file jvmti.h. This implementation
+ * is licensed under the same terms as the file jvmti.h.  The
+ * copyright and license information for the file jvmti.h follows.
+ *
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#ifndef ART_RUNTIME_OPENJDKJVMTI_JVMTI_ALLOCATOR_H_
+#define ART_RUNTIME_OPENJDKJVMTI_JVMTI_ALLOCATOR_H_
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "jvmti.h"
+
+namespace openjdkjvmti {
+
+template <typename T> class JvmtiAllocator;
+
+template <>
+class JvmtiAllocator<void> {
+ public:
+  typedef void value_type;
+  typedef void* pointer;
+  typedef const void* const_pointer;
+
+  template <typename U>
+  struct rebind {
+    typedef JvmtiAllocator<U> other;
+  };
+
+  explicit JvmtiAllocator(jvmtiEnv* env) : env_(env) {}
+
+  template <typename U>
+  JvmtiAllocator(const JvmtiAllocator<U>& other)  // NOLINT, implicit
+      : env_(other.env_) {}
+
+  JvmtiAllocator(const JvmtiAllocator& other) = default;
+  JvmtiAllocator& operator=(const JvmtiAllocator& other) = default;
+  ~JvmtiAllocator() = default;
+
+ private:
+  jvmtiEnv* env_;
+
+  template <typename U>
+  friend class JvmtiAllocator;
+
+  template <typename U>
+  friend bool operator==(const JvmtiAllocator<U>& lhs, const JvmtiAllocator<U>& rhs);
+};
+
+template <typename T>
+class JvmtiAllocator {
+ public:
+  typedef T value_type;
+  typedef T* pointer;
+  typedef T& reference;
+  typedef const T* const_pointer;
+  typedef const T& const_reference;
+  typedef size_t size_type;
+  typedef ptrdiff_t difference_type;
+
+  template <typename U>
+  struct rebind {
+    typedef JvmtiAllocator<U> other;
+  };
+
+  explicit JvmtiAllocator(jvmtiEnv* env) : env_(env) {}
+
+  template <typename U>
+  JvmtiAllocator(const JvmtiAllocator<U>& other)  // NOLINT, implicit
+      : env_(other.env_) {}
+
+  JvmtiAllocator(const JvmtiAllocator& other) = default;
+  JvmtiAllocator& operator=(const JvmtiAllocator& other) = default;
+  ~JvmtiAllocator() = default;
+
+  size_type max_size() const {
+    return static_cast<size_type>(-1) / sizeof(T);
+  }
+
+  pointer address(reference x) const { return &x; }
+  const_pointer address(const_reference x) const { return &x; }
+
+  pointer allocate(size_type n, JvmtiAllocator<void>::pointer hint ATTRIBUTE_UNUSED = nullptr) {
+    DCHECK_LE(n, max_size());
+    if (env_ == nullptr) {
+      T* result = reinterpret_cast<T*>(malloc(n * sizeof(T)));
+      CHECK(result != nullptr || n == 0u);  // Abort if malloc() fails.
+      return result;
+    } else {
+      unsigned char* result;
+      jvmtiError alloc_error = env_->Allocate(n * sizeof(T), &result);
+      CHECK(alloc_error == JVMTI_ERROR_NONE);
+      return reinterpret_cast<T*>(result);
+    }
+  }
+  void deallocate(pointer p, size_type n ATTRIBUTE_UNUSED) {
+    if (env_ == nullptr) {
+      free(p);
+    } else {
+      jvmtiError dealloc_error = env_->Deallocate(reinterpret_cast<unsigned char*>(p));
+      CHECK(dealloc_error == JVMTI_ERROR_NONE);
+    }
+  }
+
+  void construct(pointer p, const_reference val) {
+    new (static_cast<void*>(p)) value_type(val);
+  }
+  template <class U, class... Args>
+  void construct(U* p, Args&&... args) {
+    ::new (static_cast<void*>(p)) U(std::forward<Args>(args)...);
+  }
+  void destroy(pointer p) {
+    p->~value_type();
+  }
+
+  inline bool operator==(JvmtiAllocator const& other) {
+    return env_ == other.env_;
+  }
+  inline bool operator!=(JvmtiAllocator const& other) {
+    return !operator==(other);
+  }
+
+ private:
+  jvmtiEnv* env_;
+
+  template <typename U>
+  friend class JvmtiAllocator;
+
+  template <typename U>
+  friend bool operator==(const JvmtiAllocator<U>& lhs, const JvmtiAllocator<U>& rhs);
+};
+
+template <typename T>
+inline bool operator==(const JvmtiAllocator<T>& lhs, const JvmtiAllocator<T>& rhs) {
+  return lhs.env_ == rhs.env_;
+}
+
+template <typename T>
+inline bool operator!=(const JvmtiAllocator<T>& lhs, const JvmtiAllocator<T>& rhs) {
+  return !(lhs == rhs);
+}
+
+}  // namespace openjdkjvmti
+
+#endif  // ART_RUNTIME_OPENJDKJVMTI_JVMTI_ALLOCATOR_H_
diff --git a/runtime/openjdkjvmti/object_tagging.cc b/runtime/openjdkjvmti/object_tagging.cc
new file mode 100644
index 0000000..b983e79
--- /dev/null
+++ b/runtime/openjdkjvmti/object_tagging.cc
@@ -0,0 +1,387 @@
+/* Copyright (C) 2016 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This file implements interfaces from the file jvmti.h. This implementation
+ * is licensed under the same terms as the file jvmti.h.  The
+ * copyright and license information for the file jvmti.h follows.
+ *
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "object_tagging.h"
+
+#include <limits>
+
+#include "art_jvmti.h"
+#include "base/logging.h"
+#include "events-inl.h"
+#include "gc/allocation_listener.h"
+#include "instrumentation.h"
+#include "jni_env_ext-inl.h"
+#include "jvmti_allocator.h"
+#include "mirror/class.h"
+#include "mirror/object.h"
+#include "runtime.h"
+#include "ScopedLocalRef.h"
+
+namespace openjdkjvmti {
+
+void ObjectTagTable::Lock() {
+  allow_disallow_lock_.ExclusiveLock(art::Thread::Current());
+}
+void ObjectTagTable::Unlock() {
+  allow_disallow_lock_.ExclusiveUnlock(art::Thread::Current());
+}
+void ObjectTagTable::AssertLocked() {
+  allow_disallow_lock_.AssertHeld(art::Thread::Current());
+}
+
+void ObjectTagTable::UpdateTableWithReadBarrier() {
+  update_since_last_sweep_ = true;
+
+  auto WithReadBarrierUpdater = [&](const art::GcRoot<art::mirror::Object>& original_root,
+                                    art::mirror::Object* original_obj ATTRIBUTE_UNUSED)
+     REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    return original_root.Read<art::kWithReadBarrier>();
+  };
+
+  UpdateTableWith<decltype(WithReadBarrierUpdater), kIgnoreNull>(WithReadBarrierUpdater);
+}
+
+bool ObjectTagTable::GetTagSlowPath(art::Thread* self, art::mirror::Object* obj, jlong* result) {
+  // Under concurrent GC, there is a window between moving objects and sweeping of system
+  // weaks in which mutators are active. We may receive a to-space object pointer in obj,
+  // but still have from-space pointers in the table. Explicitly update the table once.
+  // Note: this will keep *all* objects in the table live, but should be a rare occurrence.
+  UpdateTableWithReadBarrier();
+  return GetTagLocked(self, obj, result);
+}
+
+void ObjectTagTable::Add(art::mirror::Object* obj, jlong tag) {
+  // Same as Set(), as we don't have duplicates in an unordered_map.
+  Set(obj, tag);
+}
+
+bool ObjectTagTable::Remove(art::mirror::Object* obj, jlong* tag) {
+  art::Thread* self = art::Thread::Current();
+  art::MutexLock mu(self, allow_disallow_lock_);
+  Wait(self);
+
+  return RemoveLocked(self, obj, tag);
+}
+bool ObjectTagTable::RemoveLocked(art::mirror::Object* obj, jlong* tag) {
+  art::Thread* self = art::Thread::Current();
+  allow_disallow_lock_.AssertHeld(self);
+  Wait(self);
+
+  return RemoveLocked(self, obj, tag);
+}
+
+bool ObjectTagTable::RemoveLocked(art::Thread* self, art::mirror::Object* obj, jlong* tag) {
+  auto it = tagged_objects_.find(art::GcRoot<art::mirror::Object>(obj));
+  if (it != tagged_objects_.end()) {
+    if (tag != nullptr) {
+      *tag = it->second;
+    }
+    tagged_objects_.erase(it);
+    return true;
+  }
+
+  if (art::kUseReadBarrier && self->GetIsGcMarking() && !update_since_last_sweep_) {
+    // Under concurrent GC, there is a window between moving objects and sweeping of system
+    // weaks in which mutators are active. We may receive a to-space object pointer in obj,
+    // but still have from-space pointers in the table. Explicitly update the table once.
+    // Note: this will keep *all* objects in the table live, but should be a rare occurrence.
+
+    // Update the table.
+    UpdateTableWithReadBarrier();
+
+    // And try again.
+    return RemoveLocked(self, obj, tag);
+  }
+
+  // Not in here.
+  return false;
+}
+
+bool ObjectTagTable::Set(art::mirror::Object* obj, jlong new_tag) {
+  if (new_tag == 0) {
+    jlong tmp;
+    return Remove(obj, &tmp);
+  }
+
+  art::Thread* self = art::Thread::Current();
+  art::MutexLock mu(self, allow_disallow_lock_);
+  Wait(self);
+
+  return SetLocked(self, obj, new_tag);
+}
+bool ObjectTagTable::SetLocked(art::mirror::Object* obj, jlong new_tag) {
+  if (new_tag == 0) {
+    jlong tmp;
+    return RemoveLocked(obj, &tmp);
+  }
+
+  art::Thread* self = art::Thread::Current();
+  allow_disallow_lock_.AssertHeld(self);
+  Wait(self);
+
+  return SetLocked(self, obj, new_tag);
+}
+
+bool ObjectTagTable::SetLocked(art::Thread* self, art::mirror::Object* obj, jlong new_tag) {
+  auto it = tagged_objects_.find(art::GcRoot<art::mirror::Object>(obj));
+  if (it != tagged_objects_.end()) {
+    it->second = new_tag;
+    return true;
+  }
+
+  if (art::kUseReadBarrier && self->GetIsGcMarking() && !update_since_last_sweep_) {
+    // Under concurrent GC, there is a window between moving objects and sweeping of system
+    // weaks in which mutators are active. We may receive a to-space object pointer in obj,
+    // but still have from-space pointers in the table. Explicitly update the table once.
+    // Note: this will keep *all* objects in the table live, but should be a rare occurrence.
+
+    // Update the table.
+    UpdateTableWithReadBarrier();
+
+    // And try again.
+    return SetLocked(self, obj, new_tag);
+  }
+
+  // New element.
+  auto insert_it = tagged_objects_.emplace(art::GcRoot<art::mirror::Object>(obj), new_tag);
+  DCHECK(insert_it.second);
+  return false;
+}
+
+void ObjectTagTable::Sweep(art::IsMarkedVisitor* visitor) {
+  if (event_handler_->IsEventEnabledAnywhere(JVMTI_EVENT_OBJECT_FREE)) {
+    SweepImpl<true>(visitor);
+  } else {
+    SweepImpl<false>(visitor);
+  }
+
+  // Under concurrent GC, there is a window between moving objects and sweeping of system
+  // weaks in which mutators are active. We may receive a to-space object pointer in obj,
+  // but still have from-space pointers in the table. We explicitly update the table then
+  // to ensure we compare against to-space pointers. But we want to do this only once. Once
+  // sweeping is done, we know all objects are to-space pointers until the next GC cycle,
+  // so we re-enable the explicit update for the next marking.
+  update_since_last_sweep_ = false;
+}
+
+template <bool kHandleNull>
+void ObjectTagTable::SweepImpl(art::IsMarkedVisitor* visitor) {
+  art::Thread* self = art::Thread::Current();
+  art::MutexLock mu(self, allow_disallow_lock_);
+
+  auto IsMarkedUpdater = [&](const art::GcRoot<art::mirror::Object>& original_root ATTRIBUTE_UNUSED,
+                             art::mirror::Object* original_obj) {
+    return visitor->IsMarked(original_obj);
+  };
+
+  UpdateTableWith<decltype(IsMarkedUpdater),
+                  kHandleNull ? kCallHandleNull : kRemoveNull>(IsMarkedUpdater);
+}
+
+void ObjectTagTable::HandleNullSweep(jlong tag) {
+  event_handler_->DispatchEvent(nullptr, JVMTI_EVENT_OBJECT_FREE, tag);
+}
+
+template <typename T, ObjectTagTable::TableUpdateNullTarget kTargetNull>
+ALWAYS_INLINE inline void ObjectTagTable::UpdateTableWith(T& updater) {
+  // We optimistically hope that elements will still be well-distributed when re-inserting them.
+  // So play with the map mechanics, and postpone rehashing. This avoids the need of a side
+  // vector and two passes.
+  float original_max_load_factor = tagged_objects_.max_load_factor();
+  tagged_objects_.max_load_factor(std::numeric_limits<float>::max());
+  // For checking that a max load-factor actually does what we expect.
+  size_t original_bucket_count = tagged_objects_.bucket_count();
+
+  for (auto it = tagged_objects_.begin(); it != tagged_objects_.end();) {
+    DCHECK(!it->first.IsNull());
+    art::mirror::Object* original_obj = it->first.Read<art::kWithoutReadBarrier>();
+    art::mirror::Object* target_obj = updater(it->first, original_obj);
+    if (original_obj != target_obj) {
+      if (kTargetNull == kIgnoreNull && target_obj == nullptr) {
+        // Ignore null target, don't do anything.
+      } else {
+        jlong tag = it->second;
+        it = tagged_objects_.erase(it);
+        if (target_obj != nullptr) {
+          tagged_objects_.emplace(art::GcRoot<art::mirror::Object>(target_obj), tag);
+          DCHECK_EQ(original_bucket_count, tagged_objects_.bucket_count());
+        } else if (kTargetNull == kCallHandleNull) {
+          HandleNullSweep(tag);
+        }
+        continue;  // Iterator was implicitly updated by erase.
+      }
+    }
+    it++;
+  }
+
+  tagged_objects_.max_load_factor(original_max_load_factor);
+  // TODO: consider rehash here.
+}
+
+template <typename T, class Allocator = std::allocator<T>>
+struct ReleasableContainer {
+  using allocator_type = Allocator;
+
+  explicit ReleasableContainer(const allocator_type& alloc, size_t reserve = 10)
+      : allocator(alloc),
+        data(reserve > 0 ? allocator.allocate(reserve) : nullptr),
+        size(0),
+        capacity(reserve) {
+  }
+
+  ~ReleasableContainer() {
+    if (data != nullptr) {
+      allocator.deallocate(data, capacity);
+      capacity = 0;
+      size = 0;
+    }
+  }
+
+  T* Release() {
+    T* tmp = data;
+
+    data = nullptr;
+    size = 0;
+    capacity = 0;
+
+    return tmp;
+  }
+
+  void Resize(size_t new_capacity) {
+    CHECK_GT(new_capacity, capacity);
+
+    T* tmp = allocator.allocate(new_capacity);
+    DCHECK(tmp != nullptr);
+    if (data != nullptr) {
+      memcpy(tmp, data, sizeof(T) * size);
+    }
+    T* old = data;
+    data = tmp;
+    allocator.deallocate(old, capacity);
+    capacity = new_capacity;
+  }
+
+  void Pushback(const T& elem) {
+    if (size == capacity) {
+      size_t new_capacity = 2 * capacity + 1;
+      Resize(new_capacity);
+    }
+    data[size++] = elem;
+  }
+
+  Allocator allocator;
+  T* data;
+  size_t size;
+  size_t capacity;
+};
+
+jvmtiError ObjectTagTable::GetTaggedObjects(jvmtiEnv* jvmti_env,
+                                            jint tag_count,
+                                            const jlong* tags,
+                                            jint* count_ptr,
+                                            jobject** object_result_ptr,
+                                            jlong** tag_result_ptr) {
+  if (tag_count < 0) {
+    return ERR(ILLEGAL_ARGUMENT);
+  }
+  if (tag_count > 0) {
+    for (size_t i = 0; i != static_cast<size_t>(tag_count); ++i) {
+      if (tags[i] == 0) {
+        return ERR(ILLEGAL_ARGUMENT);
+      }
+    }
+  }
+  if (tags == nullptr) {
+    return ERR(NULL_POINTER);
+  }
+  if (count_ptr == nullptr) {
+    return ERR(NULL_POINTER);
+  }
+
+  art::Thread* self = art::Thread::Current();
+  art::MutexLock mu(self, allow_disallow_lock_);
+  Wait(self);
+
+  art::JNIEnvExt* jni_env = self->GetJniEnv();
+
+  constexpr size_t kDefaultSize = 10;
+  size_t initial_object_size;
+  size_t initial_tag_size;
+  if (tag_count == 0) {
+    initial_object_size = (object_result_ptr != nullptr) ? tagged_objects_.size() : 0;
+    initial_tag_size = (tag_result_ptr != nullptr) ? tagged_objects_.size() : 0;
+  } else {
+    initial_object_size = initial_tag_size = kDefaultSize;
+  }
+  JvmtiAllocator<void> allocator(jvmti_env);
+  ReleasableContainer<jobject, JvmtiAllocator<jobject>> selected_objects(allocator, initial_object_size);
+  ReleasableContainer<jlong, JvmtiAllocator<jlong>> selected_tags(allocator, initial_tag_size);
+
+  size_t count = 0;
+  for (auto& pair : tagged_objects_) {
+    bool select;
+    if (tag_count > 0) {
+      select = false;
+      for (size_t i = 0; i != static_cast<size_t>(tag_count); ++i) {
+        if (tags[i] == pair.second) {
+          select = true;
+          break;
+        }
+      }
+    } else {
+      select = true;
+    }
+
+    if (select) {
+      art::mirror::Object* obj = pair.first.Read<art::kWithReadBarrier>();
+      if (obj != nullptr) {
+        count++;
+        if (object_result_ptr != nullptr) {
+          selected_objects.Pushback(jni_env->AddLocalReference<jobject>(obj));
+        }
+        if (tag_result_ptr != nullptr) {
+          selected_tags.Pushback(pair.second);
+        }
+      }
+    }
+  }
+
+  if (object_result_ptr != nullptr) {
+    *object_result_ptr = selected_objects.Release();
+  }
+  if (tag_result_ptr != nullptr) {
+    *tag_result_ptr = selected_tags.Release();
+  }
+  *count_ptr = static_cast<jint>(count);
+  return ERR(NONE);
+}
+
+}  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/object_tagging.h b/runtime/openjdkjvmti/object_tagging.h
new file mode 100644
index 0000000..0296f1a
--- /dev/null
+++ b/runtime/openjdkjvmti/object_tagging.h
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_OPENJDKJVMTI_OBJECT_TAGGING_H_
+#define ART_RUNTIME_OPENJDKJVMTI_OBJECT_TAGGING_H_
+
+#include <unordered_map>
+
+#include "base/mutex.h"
+#include "gc/system_weak.h"
+#include "gc_root-inl.h"
+#include "globals.h"
+#include "jvmti.h"
+#include "mirror/object.h"
+#include "thread-inl.h"
+
+namespace openjdkjvmti {
+
+class EventHandler;
+
+class ObjectTagTable : public art::gc::SystemWeakHolder {
+ public:
+  explicit ObjectTagTable(EventHandler* event_handler)
+      : art::gc::SystemWeakHolder(kTaggingLockLevel),
+        update_since_last_sweep_(false),
+        event_handler_(event_handler) {
+  }
+
+  void Add(art::mirror::Object* obj, jlong tag)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!allow_disallow_lock_);
+
+  bool Remove(art::mirror::Object* obj, jlong* tag)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!allow_disallow_lock_);
+  bool RemoveLocked(art::mirror::Object* obj, jlong* tag)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_);
+
+  bool Set(art::mirror::Object* obj, jlong tag)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!allow_disallow_lock_);
+  bool SetLocked(art::mirror::Object* obj, jlong tag)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_);
+
+  bool GetTag(art::mirror::Object* obj, jlong* result)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!allow_disallow_lock_) {
+    art::Thread* self = art::Thread::Current();
+    art::MutexLock mu(self, allow_disallow_lock_);
+    Wait(self);
+
+    return GetTagLocked(self, obj, result);
+  }
+  bool GetTagLocked(art::mirror::Object* obj, jlong* result)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_) {
+    art::Thread* self = art::Thread::Current();
+    allow_disallow_lock_.AssertHeld(self);
+    Wait(self);
+
+    return GetTagLocked(self, obj, result);
+  }
+
+  jlong GetTagOrZero(art::mirror::Object* obj)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!allow_disallow_lock_) {
+    jlong tmp = 0;
+    GetTag(obj, &tmp);
+    return tmp;
+  }
+  jlong GetTagOrZeroLocked(art::mirror::Object* obj)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_) {
+    jlong tmp = 0;
+    GetTagLocked(obj, &tmp);
+    return tmp;
+  }
+
+  void Sweep(art::IsMarkedVisitor* visitor)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!allow_disallow_lock_);
+
+  jvmtiError GetTaggedObjects(jvmtiEnv* jvmti_env,
+                              jint tag_count,
+                              const jlong* tags,
+                              jint* count_ptr,
+                              jobject** object_result_ptr,
+                              jlong** tag_result_ptr)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!allow_disallow_lock_);
+
+  void Lock() ACQUIRE(allow_disallow_lock_);
+  void Unlock() RELEASE(allow_disallow_lock_);
+  void AssertLocked() ASSERT_CAPABILITY(allow_disallow_lock_);
+
+ private:
+  bool SetLocked(art::Thread* self, art::mirror::Object* obj, jlong tag)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_);
+
+  bool RemoveLocked(art::Thread* self, art::mirror::Object* obj, jlong* tag)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_);
+
+  bool GetTagLocked(art::Thread* self, art::mirror::Object* obj, jlong* result)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_) {
+    auto it = tagged_objects_.find(art::GcRoot<art::mirror::Object>(obj));
+    if (it != tagged_objects_.end()) {
+      *result = it->second;
+      return true;
+    }
+
+    if (art::kUseReadBarrier &&
+        self != nullptr &&
+        self->GetIsGcMarking() &&
+        !update_since_last_sweep_) {
+      return GetTagSlowPath(self, obj, result);
+    }
+
+    return false;
+  }
+
+  // Slow-path for GetTag. We didn't find the object, but we might be storing from-pointers and
+  // are asked to retrieve with a to-pointer.
+  bool GetTagSlowPath(art::Thread* self, art::mirror::Object* obj, jlong* result)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_);
+
+  // Update the table by doing read barriers on each element, ensuring that to-space pointers
+  // are stored.
+  void UpdateTableWithReadBarrier()
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_);
+
+  template <bool kHandleNull>
+  void SweepImpl(art::IsMarkedVisitor* visitor)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!allow_disallow_lock_);
+  void HandleNullSweep(jlong tag);
+
+  enum TableUpdateNullTarget {
+    kIgnoreNull,
+    kRemoveNull,
+    kCallHandleNull
+  };
+
+  template <typename T, TableUpdateNullTarget kTargetNull>
+  void UpdateTableWith(T& updater)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(allow_disallow_lock_);
+
+  struct HashGcRoot {
+    size_t operator()(const art::GcRoot<art::mirror::Object>& r) const
+        REQUIRES_SHARED(art::Locks::mutator_lock_) {
+      return reinterpret_cast<uintptr_t>(r.Read<art::kWithoutReadBarrier>());
+    }
+  };
+
+  struct EqGcRoot {
+    bool operator()(const art::GcRoot<art::mirror::Object>& r1,
+                    const art::GcRoot<art::mirror::Object>& r2) const
+        REQUIRES_SHARED(art::Locks::mutator_lock_) {
+      return r1.Read<art::kWithoutReadBarrier>() == r2.Read<art::kWithoutReadBarrier>();
+    }
+  };
+
+  // The tag table is used when visiting roots. So it needs to have a low lock level.
+  static constexpr art::LockLevel kTaggingLockLevel =
+      static_cast<art::LockLevel>(art::LockLevel::kAbortLock + 1);
+
+  std::unordered_map<art::GcRoot<art::mirror::Object>,
+                     jlong,
+                     HashGcRoot,
+                     EqGcRoot> tagged_objects_
+      GUARDED_BY(allow_disallow_lock_)
+      GUARDED_BY(art::Locks::mutator_lock_);
+  // To avoid repeatedly scanning the whole table, remember if we did that since the last sweep.
+  bool update_since_last_sweep_;
+
+  EventHandler* event_handler_;
+};
+
+}  // namespace openjdkjvmti
+
+#endif  // ART_RUNTIME_OPENJDKJVMTI_OBJECT_TAGGING_H_
diff --git a/runtime/openjdkjvmti/ti_class.cc b/runtime/openjdkjvmti/ti_class.cc
new file mode 100644
index 0000000..de2076a
--- /dev/null
+++ b/runtime/openjdkjvmti/ti_class.cc
@@ -0,0 +1,73 @@
+/* Copyright (C) 2016 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This file implements interfaces from the file jvmti.h. This implementation
+ * is licensed under the same terms as the file jvmti.h.  The
+ * copyright and license information for the file jvmti.h follows.
+ *
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "ti_class.h"
+
+#include "art_jvmti.h"
+#include "scoped_thread_state_change-inl.h"
+#include "thread-inl.h"
+
+namespace openjdkjvmti {
+
+jvmtiError ClassUtil::GetClassSignature(jvmtiEnv* env,
+                                         jclass jklass,
+                                         char** signature_ptr,
+                                         char** generic_ptr) {
+  art::ScopedObjectAccess soa(art::Thread::Current());
+  art::ObjPtr<art::mirror::Class> klass = soa.Decode<art::mirror::Class>(jklass);
+  if (klass == nullptr) {
+    return ERR(INVALID_CLASS);
+  }
+
+  JvmtiUniquePtr sig_copy;
+  if (signature_ptr != nullptr) {
+    std::string storage;
+    const char* descriptor = klass->GetDescriptor(&storage);
+
+    unsigned char* tmp;
+    jvmtiError ret = CopyString(env, descriptor, &tmp);
+    if (ret != ERR(NONE)) {
+      return ret;
+    }
+    sig_copy = MakeJvmtiUniquePtr(env, tmp);
+    *signature_ptr = reinterpret_cast<char*>(tmp);
+  }
+
+  // TODO: Support generic signature.
+  *generic_ptr = nullptr;
+
+  // Everything is fine, release the buffers.
+  sig_copy.release();
+
+  return ERR(NONE);
+}
+
+}  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/ti_class.h b/runtime/openjdkjvmti/ti_class.h
new file mode 100644
index 0000000..caa77d4
--- /dev/null
+++ b/runtime/openjdkjvmti/ti_class.h
@@ -0,0 +1,50 @@
+/* Copyright (C) 2016 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This file implements interfaces from the file jvmti.h. This implementation
+ * is licensed under the same terms as the file jvmti.h.  The
+ * copyright and license information for the file jvmti.h follows.
+ *
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_CLASS_H_
+#define ART_RUNTIME_OPENJDKJVMTI_TI_CLASS_H_
+
+#include "jni.h"
+#include "jvmti.h"
+
+namespace openjdkjvmti {
+
+class ClassUtil {
+ public:
+  static jvmtiError GetClassSignature(jvmtiEnv* env,
+                                      jclass klass,
+                                      char** signature_ptr,
+                                      char** generic_ptr);
+};
+
+}  // namespace openjdkjvmti
+
+#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_CLASS_H_
diff --git a/runtime/openjdkjvmti/ti_heap.cc b/runtime/openjdkjvmti/ti_heap.cc
new file mode 100644
index 0000000..0eff469
--- /dev/null
+++ b/runtime/openjdkjvmti/ti_heap.cc
@@ -0,0 +1,684 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ti_heap.h"
+
+#include "art_field-inl.h"
+#include "art_jvmti.h"
+#include "base/macros.h"
+#include "base/mutex.h"
+#include "class_linker.h"
+#include "gc/heap.h"
+#include "gc_root-inl.h"
+#include "jni_env_ext.h"
+#include "jni_internal.h"
+#include "mirror/class.h"
+#include "mirror/object-inl.h"
+#include "mirror/object_array-inl.h"
+#include "object_callbacks.h"
+#include "object_tagging.h"
+#include "obj_ptr-inl.h"
+#include "runtime.h"
+#include "scoped_thread_state_change-inl.h"
+#include "thread-inl.h"
+#include "thread_list.h"
+
+namespace openjdkjvmti {
+
+struct IterateThroughHeapData {
+  IterateThroughHeapData(HeapUtil* _heap_util,
+                         jint heap_filter,
+                         art::ObjPtr<art::mirror::Class> klass,
+                         const jvmtiHeapCallbacks* _callbacks,
+                         const void* _user_data)
+      : heap_util(_heap_util),
+        filter_klass(klass),
+        callbacks(_callbacks),
+        user_data(_user_data),
+        filter_out_tagged((heap_filter & JVMTI_HEAP_FILTER_TAGGED) != 0),
+        filter_out_untagged((heap_filter & JVMTI_HEAP_FILTER_UNTAGGED) != 0),
+        filter_out_class_tagged((heap_filter & JVMTI_HEAP_FILTER_CLASS_TAGGED) != 0),
+        filter_out_class_untagged((heap_filter & JVMTI_HEAP_FILTER_CLASS_UNTAGGED) != 0),
+        any_filter(filter_out_tagged ||
+                   filter_out_untagged ||
+                   filter_out_class_tagged ||
+                   filter_out_class_untagged),
+        stop_reports(false) {
+  }
+
+  bool ShouldReportByHeapFilter(jlong tag, jlong class_tag) {
+    if (!any_filter) {
+      return true;
+    }
+
+    if ((tag == 0 && filter_out_untagged) || (tag != 0 && filter_out_tagged)) {
+      return false;
+    }
+
+    if ((class_tag == 0 && filter_out_class_untagged) ||
+        (class_tag != 0 && filter_out_class_tagged)) {
+      return false;
+    }
+
+    return true;
+  }
+
+  HeapUtil* heap_util;
+  art::ObjPtr<art::mirror::Class> filter_klass;
+  const jvmtiHeapCallbacks* callbacks;
+  const void* user_data;
+  const bool filter_out_tagged;
+  const bool filter_out_untagged;
+  const bool filter_out_class_tagged;
+  const bool filter_out_class_untagged;
+  const bool any_filter;
+
+  bool stop_reports;
+};
+
+static void IterateThroughHeapObjectCallback(art::mirror::Object* obj, void* arg)
+    REQUIRES_SHARED(art::Locks::mutator_lock_) {
+  IterateThroughHeapData* ithd = reinterpret_cast<IterateThroughHeapData*>(arg);
+  // Early return, as we can't really stop visiting.
+  if (ithd->stop_reports) {
+    return;
+  }
+
+  art::ScopedAssertNoThreadSuspension no_suspension("IterateThroughHeapCallback");
+
+  jlong tag = 0;
+  ithd->heap_util->GetTags()->GetTag(obj, &tag);
+
+  jlong class_tag = 0;
+  art::ObjPtr<art::mirror::Class> klass = obj->GetClass();
+  ithd->heap_util->GetTags()->GetTag(klass.Ptr(), &class_tag);
+  // For simplicity, even if we find a tag = 0, assume 0 = not tagged.
+
+  if (!ithd->ShouldReportByHeapFilter(tag, class_tag)) {
+    return;
+  }
+
+  // TODO: Handle array_primitive_value_callback.
+
+  if (ithd->filter_klass != nullptr) {
+    if (ithd->filter_klass != klass) {
+      return;
+    }
+  }
+
+  jlong size = obj->SizeOf();
+
+  jint length = -1;
+  if (obj->IsArrayInstance()) {
+    length = obj->AsArray()->GetLength();
+  }
+
+  jlong saved_tag = tag;
+  jint ret = ithd->callbacks->heap_iteration_callback(class_tag,
+                                                      size,
+                                                      &tag,
+                                                      length,
+                                                      const_cast<void*>(ithd->user_data));
+
+  if (tag != saved_tag) {
+    ithd->heap_util->GetTags()->Set(obj, tag);
+  }
+
+  ithd->stop_reports = (ret & JVMTI_VISIT_ABORT) != 0;
+
+  // TODO Implement array primitive and string primitive callback.
+  // TODO Implement primitive field callback.
+}
+
+jvmtiError HeapUtil::IterateThroughHeap(jvmtiEnv* env ATTRIBUTE_UNUSED,
+                                        jint heap_filter,
+                                        jclass klass,
+                                        const jvmtiHeapCallbacks* callbacks,
+                                        const void* user_data) {
+  if (callbacks == nullptr) {
+    return ERR(NULL_POINTER);
+  }
+
+  if (callbacks->array_primitive_value_callback != nullptr) {
+    // TODO: Implement.
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  art::Thread* self = art::Thread::Current();
+  art::ScopedObjectAccess soa(self);      // Now we know we have the shared lock.
+
+  IterateThroughHeapData ithd(this,
+                              heap_filter,
+                              soa.Decode<art::mirror::Class>(klass),
+                              callbacks,
+                              user_data);
+
+  art::Runtime::Current()->GetHeap()->VisitObjects(IterateThroughHeapObjectCallback, &ithd);
+
+  return ERR(NONE);
+}
+
+class FollowReferencesHelper FINAL {
+ public:
+  FollowReferencesHelper(HeapUtil* h,
+                         art::ObjPtr<art::mirror::Object> initial_object ATTRIBUTE_UNUSED,
+                         const jvmtiHeapCallbacks* callbacks,
+                         const void* user_data)
+      : tag_table_(h->GetTags()),
+        callbacks_(callbacks),
+        user_data_(user_data),
+        start_(0),
+        stop_reports_(false) {
+  }
+
+  void Init()
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!*tag_table_->GetAllowDisallowLock()) {
+    CollectAndReportRootsVisitor carrv(this, tag_table_, &worklist_, &visited_);
+    art::Runtime::Current()->VisitRoots(&carrv);
+    art::Runtime::Current()->VisitImageRoots(&carrv);
+    stop_reports_ = carrv.IsStopReports();
+
+    if (stop_reports_) {
+      worklist_.clear();
+    }
+  }
+
+  void Work()
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!*tag_table_->GetAllowDisallowLock()) {
+    // Currently implemented as a BFS. To lower overhead, we don't erase elements immediately
+    // from the head of the work list, instead postponing until there's a gap that's "large."
+    //
+    // Alternatively, we can implement a DFS and use the work list as a stack.
+    while (start_ < worklist_.size()) {
+      art::mirror::Object* cur_obj = worklist_[start_];
+      start_++;
+
+      if (start_ >= kMaxStart) {
+        worklist_.erase(worklist_.begin(), worklist_.begin() + start_);
+        start_ = 0;
+      }
+
+      VisitObject(cur_obj);
+
+      if (stop_reports_) {
+        break;
+      }
+    }
+  }
+
+ private:
+  class CollectAndReportRootsVisitor FINAL : public art::RootVisitor {
+   public:
+    CollectAndReportRootsVisitor(FollowReferencesHelper* helper,
+                                 ObjectTagTable* tag_table,
+                                 std::vector<art::mirror::Object*>* worklist,
+                                 std::unordered_set<art::mirror::Object*>* visited)
+        : helper_(helper),
+          tag_table_(tag_table),
+          worklist_(worklist),
+          visited_(visited),
+          stop_reports_(false) {}
+
+    void VisitRoots(art::mirror::Object*** roots, size_t count, const art::RootInfo& info)
+        OVERRIDE
+        REQUIRES_SHARED(art::Locks::mutator_lock_)
+        REQUIRES(!*helper_->tag_table_->GetAllowDisallowLock()) {
+      for (size_t i = 0; i != count; ++i) {
+        AddRoot(*roots[i], info);
+      }
+    }
+
+    void VisitRoots(art::mirror::CompressedReference<art::mirror::Object>** roots,
+                    size_t count,
+                    const art::RootInfo& info)
+        OVERRIDE REQUIRES_SHARED(art::Locks::mutator_lock_)
+        REQUIRES(!*helper_->tag_table_->GetAllowDisallowLock()) {
+      for (size_t i = 0; i != count; ++i) {
+        AddRoot(roots[i]->AsMirrorPtr(), info);
+      }
+    }
+
+    bool IsStopReports() {
+      return stop_reports_;
+    }
+
+   private:
+    void AddRoot(art::mirror::Object* root_obj, const art::RootInfo& info)
+        REQUIRES_SHARED(art::Locks::mutator_lock_)
+        REQUIRES(!*tag_table_->GetAllowDisallowLock()) {
+      // We use visited_ to mark roots already so we do not need another set.
+      if (visited_->find(root_obj) == visited_->end()) {
+        visited_->insert(root_obj);
+        worklist_->push_back(root_obj);
+      }
+      ReportRoot(root_obj, info);
+    }
+
+    jvmtiHeapReferenceKind GetReferenceKind(const art::RootInfo& info,
+                                            jvmtiHeapReferenceInfo* ref_info)
+        REQUIRES_SHARED(art::Locks::mutator_lock_) {
+      // TODO: Fill in ref_info.
+      memset(ref_info, 0, sizeof(jvmtiHeapReferenceInfo));
+
+      switch (info.GetType()) {
+        case art::RootType::kRootJNIGlobal:
+          return JVMTI_HEAP_REFERENCE_JNI_GLOBAL;
+
+        case art::RootType::kRootJNILocal:
+          return JVMTI_HEAP_REFERENCE_JNI_LOCAL;
+
+        case art::RootType::kRootJavaFrame:
+          return JVMTI_HEAP_REFERENCE_STACK_LOCAL;
+
+        case art::RootType::kRootNativeStack:
+        case art::RootType::kRootThreadBlock:
+        case art::RootType::kRootThreadObject:
+          return JVMTI_HEAP_REFERENCE_THREAD;
+
+        case art::RootType::kRootStickyClass:
+        case art::RootType::kRootInternedString:
+          // Note: this isn't a root in the RI.
+          return JVMTI_HEAP_REFERENCE_SYSTEM_CLASS;
+
+        case art::RootType::kRootMonitorUsed:
+        case art::RootType::kRootJNIMonitor:
+          return JVMTI_HEAP_REFERENCE_MONITOR;
+
+        case art::RootType::kRootFinalizing:
+        case art::RootType::kRootDebugger:
+        case art::RootType::kRootReferenceCleanup:
+        case art::RootType::kRootVMInternal:
+        case art::RootType::kRootUnknown:
+          return JVMTI_HEAP_REFERENCE_OTHER;
+      }
+      LOG(FATAL) << "Unreachable";
+      UNREACHABLE();
+    }
+
+    void ReportRoot(art::mirror::Object* root_obj, const art::RootInfo& info)
+        REQUIRES_SHARED(art::Locks::mutator_lock_)
+        REQUIRES(!*tag_table_->GetAllowDisallowLock()) {
+      jvmtiHeapReferenceInfo ref_info;
+      jvmtiHeapReferenceKind kind = GetReferenceKind(info, &ref_info);
+      jint result = helper_->ReportReference(kind, &ref_info, nullptr, root_obj);
+      if ((result & JVMTI_VISIT_ABORT) != 0) {
+        stop_reports_ = true;
+      }
+    }
+
+   private:
+    FollowReferencesHelper* helper_;
+    ObjectTagTable* tag_table_;
+    std::vector<art::mirror::Object*>* worklist_;
+    std::unordered_set<art::mirror::Object*>* visited_;
+    bool stop_reports_;
+  };
+
+  void VisitObject(art::mirror::Object* obj)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!*tag_table_->GetAllowDisallowLock()) {
+    if (obj->IsClass()) {
+      VisitClass(obj->AsClass());
+      return;
+    }
+    if (obj->IsArrayInstance()) {
+      VisitArray(obj);
+      return;
+    }
+
+    // TODO: We'll probably have to rewrite this completely with our own visiting logic, if we
+    //       want to have a chance of getting the field indices computed halfway efficiently. For
+    //       now, ignore them altogether.
+
+    struct InstanceReferenceVisitor {
+      explicit InstanceReferenceVisitor(FollowReferencesHelper* helper_)
+          : helper(helper_), stop_reports(false) {}
+
+      void operator()(art::mirror::Object* src,
+                      art::MemberOffset field_offset,
+                      bool is_static ATTRIBUTE_UNUSED) const
+          REQUIRES_SHARED(art::Locks::mutator_lock_)
+          REQUIRES(!*helper->tag_table_->GetAllowDisallowLock()) {
+        if (stop_reports) {
+          return;
+        }
+
+        art::mirror::Object* trg = src->GetFieldObjectReferenceAddr(field_offset)->AsMirrorPtr();
+        jvmtiHeapReferenceInfo reference_info;
+        memset(&reference_info, 0, sizeof(reference_info));
+
+        // TODO: Implement spec-compliant numbering.
+        reference_info.field.index = field_offset.Int32Value();
+
+        jvmtiHeapReferenceKind kind =
+            field_offset.Int32Value() == art::mirror::Object::ClassOffset().Int32Value()
+                ? JVMTI_HEAP_REFERENCE_CLASS
+                : JVMTI_HEAP_REFERENCE_FIELD;
+        const jvmtiHeapReferenceInfo* reference_info_ptr =
+            kind == JVMTI_HEAP_REFERENCE_CLASS ? nullptr : &reference_info;
+
+        stop_reports = !helper->ReportReferenceMaybeEnqueue(kind, reference_info_ptr, src, trg);
+      }
+
+      void VisitRoot(art::mirror::CompressedReference<art::mirror::Object>* root ATTRIBUTE_UNUSED)
+          const {
+        LOG(FATAL) << "Unreachable";
+      }
+      void VisitRootIfNonNull(
+          art::mirror::CompressedReference<art::mirror::Object>* root ATTRIBUTE_UNUSED) const {
+        LOG(FATAL) << "Unreachable";
+      }
+
+      // "mutable" required by the visitor API.
+      mutable FollowReferencesHelper* helper;
+      mutable bool stop_reports;
+    };
+
+    InstanceReferenceVisitor visitor(this);
+    // Visit references, not native roots.
+    obj->VisitReferences<false>(visitor, art::VoidFunctor());
+
+    stop_reports_ = visitor.stop_reports;
+  }
+
+  void VisitArray(art::mirror::Object* array)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!*tag_table_->GetAllowDisallowLock()) {
+    stop_reports_ = !ReportReferenceMaybeEnqueue(JVMTI_HEAP_REFERENCE_CLASS,
+                                                 nullptr,
+                                                 array,
+                                                 array->GetClass());
+    if (stop_reports_) {
+      return;
+    }
+
+    if (array->IsObjectArray()) {
+      art::mirror::ObjectArray<art::mirror::Object>* obj_array =
+          array->AsObjectArray<art::mirror::Object>();
+      int32_t length = obj_array->GetLength();
+      for (int32_t i = 0; i != length; ++i) {
+        art::mirror::Object* elem = obj_array->GetWithoutChecks(i);
+        if (elem != nullptr) {
+          jvmtiHeapReferenceInfo reference_info;
+          reference_info.array.index = i;
+          stop_reports_ = !ReportReferenceMaybeEnqueue(JVMTI_HEAP_REFERENCE_ARRAY_ELEMENT,
+                                                       &reference_info,
+                                                       array,
+                                                       elem);
+          if (stop_reports_) {
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  void VisitClass(art::mirror::Class* klass)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!*tag_table_->GetAllowDisallowLock()) {
+    // TODO: Are erroneous classes reported? Are non-prepared ones? For now, just use resolved ones.
+    if (!klass->IsResolved()) {
+      return;
+    }
+
+    // Superclass.
+    stop_reports_ = !ReportReferenceMaybeEnqueue(JVMTI_HEAP_REFERENCE_SUPERCLASS,
+                                                 nullptr,
+                                                 klass,
+                                                 klass->GetSuperClass());
+    if (stop_reports_) {
+      return;
+    }
+
+    // Directly implemented or extended interfaces.
+    art::Thread* self = art::Thread::Current();
+    art::StackHandleScope<1> hs(self);
+    art::Handle<art::mirror::Class> h_klass(hs.NewHandle<art::mirror::Class>(klass));
+    for (size_t i = 0; i < h_klass->NumDirectInterfaces(); ++i) {
+      art::ObjPtr<art::mirror::Class> inf_klass =
+          art::mirror::Class::GetDirectInterface(self, h_klass, i);
+      if (inf_klass == nullptr) {
+        // TODO: With a resolved class this should not happen...
+        self->ClearException();
+        break;
+      }
+
+      stop_reports_ = !ReportReferenceMaybeEnqueue(JVMTI_HEAP_REFERENCE_INTERFACE,
+                                                   nullptr,
+                                                   klass,
+                                                   inf_klass.Ptr());
+      if (stop_reports_) {
+        return;
+      }
+    }
+
+    // Classloader.
+    // TODO: What about the boot classpath loader? We'll skip for now, but do we have to find the
+    //       fake BootClassLoader?
+    if (klass->GetClassLoader() != nullptr) {
+      stop_reports_ = !ReportReferenceMaybeEnqueue(JVMTI_HEAP_REFERENCE_CLASS_LOADER,
+                                                   nullptr,
+                                                   klass,
+                                                   klass->GetClassLoader());
+      if (stop_reports_) {
+        return;
+      }
+    }
+    DCHECK_EQ(h_klass.Get(), klass);
+
+    // Declared static fields.
+    for (auto& field : klass->GetSFields()) {
+      if (!field.IsPrimitiveType()) {
+        art::ObjPtr<art::mirror::Object> field_value = field.GetObject(klass);
+        if (field_value != nullptr) {
+          jvmtiHeapReferenceInfo reference_info;
+          memset(&reference_info, 0, sizeof(reference_info));
+
+          // TODO: Implement spec-compliant numbering.
+          reference_info.field.index = field.GetOffset().Int32Value();
+
+          stop_reports_ = !ReportReferenceMaybeEnqueue(JVMTI_HEAP_REFERENCE_STATIC_FIELD,
+                                                       &reference_info,
+                                                       klass,
+                                                       field_value.Ptr());
+          if (stop_reports_) {
+            return;
+          }
+        }
+      }
+    }
+  }
+
+  void MaybeEnqueue(art::mirror::Object* obj) REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    if (visited_.find(obj) == visited_.end()) {
+      worklist_.push_back(obj);
+      visited_.insert(obj);
+    }
+  }
+
+  bool ReportReferenceMaybeEnqueue(jvmtiHeapReferenceKind kind,
+                                   const jvmtiHeapReferenceInfo* reference_info,
+                                   art::mirror::Object* referree,
+                                   art::mirror::Object* referrer)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!*tag_table_->GetAllowDisallowLock()) {
+    jint result = ReportReference(kind, reference_info, referree, referrer);
+    if ((result & JVMTI_VISIT_ABORT) == 0) {
+      if ((result & JVMTI_VISIT_OBJECTS) != 0) {
+        MaybeEnqueue(referrer);
+      }
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  jint ReportReference(jvmtiHeapReferenceKind kind,
+                       const jvmtiHeapReferenceInfo* reference_info,
+                       art::mirror::Object* referrer,
+                       art::mirror::Object* referree)
+      REQUIRES_SHARED(art::Locks::mutator_lock_)
+      REQUIRES(!*tag_table_->GetAllowDisallowLock()) {
+    if (referree == nullptr || stop_reports_) {
+      return 0;
+    }
+
+    const jlong class_tag = tag_table_->GetTagOrZero(referree->GetClass());
+    const jlong referrer_class_tag =
+        referrer == nullptr ? 0 : tag_table_->GetTagOrZero(referrer->GetClass());
+    const jlong size = static_cast<jlong>(referree->SizeOf());
+    jlong tag = tag_table_->GetTagOrZero(referree);
+    jlong saved_tag = tag;
+    jlong referrer_tag = 0;
+    jlong saved_referrer_tag = 0;
+    jlong* referrer_tag_ptr;
+    if (referrer == nullptr) {
+      referrer_tag_ptr = nullptr;
+    } else {
+      if (referrer == referree) {
+        referrer_tag_ptr = &tag;
+      } else {
+        referrer_tag = saved_referrer_tag = tag_table_->GetTagOrZero(referrer);
+        referrer_tag_ptr = &referrer_tag;
+      }
+    }
+    jint length = -1;
+    if (referree->IsArrayInstance()) {
+      length = referree->AsArray()->GetLength();
+    }
+
+    jint result = callbacks_->heap_reference_callback(kind,
+                                                      reference_info,
+                                                      class_tag,
+                                                      referrer_class_tag,
+                                                      size,
+                                                      &tag,
+                                                      referrer_tag_ptr,
+                                                      length,
+                                                      const_cast<void*>(user_data_));
+
+    if (tag != saved_tag) {
+      tag_table_->Set(referree, tag);
+    }
+    if (referrer_tag != saved_referrer_tag) {
+      tag_table_->Set(referrer, referrer_tag);
+    }
+
+    return result;
+  }
+
+  ObjectTagTable* tag_table_;
+  const jvmtiHeapCallbacks* callbacks_;
+  const void* user_data_;
+
+  std::vector<art::mirror::Object*> worklist_;
+  size_t start_;
+  static constexpr size_t kMaxStart = 1000000U;
+
+  std::unordered_set<art::mirror::Object*> visited_;
+
+  bool stop_reports_;
+
+  friend class CollectAndReportRootsVisitor;
+};
+
+jvmtiError HeapUtil::FollowReferences(jvmtiEnv* env ATTRIBUTE_UNUSED,
+                                      jint heap_filter ATTRIBUTE_UNUSED,
+                                      jclass klass ATTRIBUTE_UNUSED,
+                                      jobject initial_object,
+                                      const jvmtiHeapCallbacks* callbacks,
+                                      const void* user_data) {
+  if (callbacks == nullptr) {
+    return ERR(NULL_POINTER);
+  }
+
+  if (callbacks->array_primitive_value_callback != nullptr) {
+    // TODO: Implement.
+    return ERR(NOT_IMPLEMENTED);
+  }
+
+  art::Thread* self = art::Thread::Current();
+  art::ScopedObjectAccess soa(self);      // Now we know we have the shared lock.
+
+  art::Runtime::Current()->GetHeap()->IncrementDisableMovingGC(self);
+  {
+    art::ObjPtr<art::mirror::Object> o_initial = soa.Decode<art::mirror::Object>(initial_object);
+
+    art::ScopedThreadSuspension sts(self, art::kWaitingForVisitObjects);
+    art::ScopedSuspendAll ssa("FollowReferences");
+
+    FollowReferencesHelper frh(this, o_initial, callbacks, user_data);
+    frh.Init();
+    frh.Work();
+  }
+  art::Runtime::Current()->GetHeap()->DecrementDisableMovingGC(self);
+
+  return ERR(NONE);
+}
+
+jvmtiError HeapUtil::GetLoadedClasses(jvmtiEnv* env,
+                                      jint* class_count_ptr,
+                                      jclass** classes_ptr) {
+  if (class_count_ptr == nullptr || classes_ptr == nullptr) {
+    return ERR(NULL_POINTER);
+  }
+
+  class ReportClassVisitor : public art::ClassVisitor {
+   public:
+    explicit ReportClassVisitor(art::Thread* self) : self_(self) {}
+
+    bool operator()(art::ObjPtr<art::mirror::Class> klass)
+        OVERRIDE REQUIRES_SHARED(art::Locks::mutator_lock_) {
+      classes_.push_back(self_->GetJniEnv()->AddLocalReference<jclass>(klass));
+      return true;
+    }
+
+    art::Thread* self_;
+    std::vector<jclass> classes_;
+  };
+
+  art::Thread* self = art::Thread::Current();
+  ReportClassVisitor rcv(self);
+  {
+    art::ScopedObjectAccess soa(self);
+    art::Runtime::Current()->GetClassLinker()->VisitClasses(&rcv);
+  }
+
+  size_t size = rcv.classes_.size();
+  jclass* classes = nullptr;
+  jvmtiError alloc_ret = env->Allocate(static_cast<jlong>(size * sizeof(jclass)),
+                                       reinterpret_cast<unsigned char**>(&classes));
+  if (alloc_ret != ERR(NONE)) {
+    return alloc_ret;
+  }
+
+  for (size_t i = 0; i < size; ++i) {
+    classes[i] = rcv.classes_[i];
+  }
+  *classes_ptr = classes;
+  *class_count_ptr = static_cast<jint>(size);
+
+  return ERR(NONE);
+}
+
+jvmtiError HeapUtil::ForceGarbageCollection(jvmtiEnv* env ATTRIBUTE_UNUSED) {
+  art::Runtime::Current()->GetHeap()->CollectGarbage(false);
+
+  return ERR(NONE);
+}
+}  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/ti_heap.h b/runtime/openjdkjvmti/ti_heap.h
new file mode 100644
index 0000000..72ee097
--- /dev/null
+++ b/runtime/openjdkjvmti/ti_heap.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_HEAP_H_
+#define ART_RUNTIME_OPENJDKJVMTI_TI_HEAP_H_
+
+#include "jvmti.h"
+
+namespace openjdkjvmti {
+
+class ObjectTagTable;
+
+class HeapUtil {
+ public:
+  explicit HeapUtil(ObjectTagTable* tags) : tags_(tags) {
+  }
+
+  jvmtiError GetLoadedClasses(jvmtiEnv* env, jint* class_count_ptr, jclass** classes_ptr);
+
+  jvmtiError IterateThroughHeap(jvmtiEnv* env,
+                                jint heap_filter,
+                                jclass klass,
+                                const jvmtiHeapCallbacks* callbacks,
+                                const void* user_data);
+
+  jvmtiError FollowReferences(jvmtiEnv* env,
+                              jint heap_filter,
+                              jclass klass,
+                              jobject initial_object,
+                              const jvmtiHeapCallbacks* callbacks,
+                              const void* user_data);
+
+  static jvmtiError ForceGarbageCollection(jvmtiEnv* env);
+
+  ObjectTagTable* GetTags() {
+    return tags_;
+  }
+
+ private:
+  ObjectTagTable* tags_;
+};
+
+}  // namespace openjdkjvmti
+
+#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_HEAP_H_
diff --git a/runtime/openjdkjvmti/ti_method.cc b/runtime/openjdkjvmti/ti_method.cc
new file mode 100644
index 0000000..e391a9d
--- /dev/null
+++ b/runtime/openjdkjvmti/ti_method.cc
@@ -0,0 +1,131 @@
+/* Copyright (C) 2016 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This file implements interfaces from the file jvmti.h. This implementation
+ * is licensed under the same terms as the file jvmti.h.  The
+ * copyright and license information for the file jvmti.h follows.
+ *
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "ti_method.h"
+
+#include "art_jvmti.h"
+#include "art_method-inl.h"
+#include "base/enums.h"
+#include "jni_internal.h"
+#include "modifiers.h"
+#include "scoped_thread_state_change-inl.h"
+
+namespace openjdkjvmti {
+
+jvmtiError MethodUtil::GetMethodName(jvmtiEnv* env,
+                                     jmethodID method,
+                                     char** name_ptr,
+                                     char** signature_ptr,
+                                     char** generic_ptr) {
+  art::ScopedObjectAccess soa(art::Thread::Current());
+  art::ArtMethod* art_method = art::jni::DecodeArtMethod(method);
+  art_method = art_method->GetInterfaceMethodIfProxy(art::kRuntimePointerSize);
+
+  JvmtiUniquePtr name_copy;
+  if (name_ptr != nullptr) {
+    const char* method_name = art_method->GetName();
+    if (method_name == nullptr) {
+      method_name = "<error>";
+    }
+    unsigned char* tmp;
+    jvmtiError ret = CopyString(env, method_name, &tmp);
+    if (ret != ERR(NONE)) {
+      return ret;
+    }
+    name_copy = MakeJvmtiUniquePtr(env, tmp);
+    *name_ptr = reinterpret_cast<char*>(tmp);
+  }
+
+  JvmtiUniquePtr signature_copy;
+  if (signature_ptr != nullptr) {
+    const art::Signature sig = art_method->GetSignature();
+    std::string str = sig.ToString();
+    unsigned char* tmp;
+    jvmtiError ret = CopyString(env, str.c_str(), &tmp);
+    if (ret != ERR(NONE)) {
+      return ret;
+    }
+    signature_copy = MakeJvmtiUniquePtr(env, tmp);
+    *signature_ptr = reinterpret_cast<char*>(tmp);
+  }
+
+  // TODO: Support generic signature.
+  *generic_ptr = nullptr;
+
+  // Everything is fine, release the buffers.
+  name_copy.release();
+  signature_copy.release();
+
+  return ERR(NONE);
+}
+
+jvmtiError MethodUtil::GetMethodDeclaringClass(jvmtiEnv* env ATTRIBUTE_UNUSED,
+                                               jmethodID method,
+                                               jclass* declaring_class_ptr) {
+  if (declaring_class_ptr == nullptr) {
+    return ERR(NULL_POINTER);
+  }
+
+  art::ArtMethod* art_method = art::jni::DecodeArtMethod(method);
+  // Note: No GetInterfaceMethodIfProxy, we want to actual class.
+
+  art::ScopedObjectAccess soa(art::Thread::Current());
+  art::mirror::Class* klass = art_method->GetDeclaringClass();
+  *declaring_class_ptr = soa.AddLocalReference<jclass>(klass);
+
+  return ERR(NONE);
+}
+
+jvmtiError MethodUtil::GetMethodModifiers(jvmtiEnv* env ATTRIBUTE_UNUSED,
+                                          jmethodID method,
+                                          jint* modifiers_ptr) {
+  if (modifiers_ptr == nullptr) {
+    return ERR(NULL_POINTER);
+  }
+
+  art::ArtMethod* art_method = art::jni::DecodeArtMethod(method);
+  uint32_t modifiers = art_method->GetAccessFlags();
+
+  // Note: Keep this code in sync with Executable.fixMethodFlags.
+  if ((modifiers & art::kAccAbstract) != 0) {
+    modifiers &= ~art::kAccNative;
+  }
+  modifiers &= ~art::kAccSynchronized;
+  if ((modifiers & art::kAccDeclaredSynchronized) != 0) {
+    modifiers |= art::kAccSynchronized;
+  }
+  modifiers &= art::kAccJavaFlagsMask;
+
+  *modifiers_ptr = modifiers;
+  return ERR(NONE);
+}
+
+}  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/ti_method.h b/runtime/openjdkjvmti/ti_method.h
new file mode 100644
index 0000000..43f11f9
--- /dev/null
+++ b/runtime/openjdkjvmti/ti_method.h
@@ -0,0 +1,59 @@
+/* Copyright (C) 2016 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This file implements interfaces from the file jvmti.h. This implementation
+ * is licensed under the same terms as the file jvmti.h.  The
+ * copyright and license information for the file jvmti.h follows.
+ *
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_METHOD_H_
+#define ART_RUNTIME_OPENJDKJVMTI_TI_METHOD_H_
+
+#include "jni.h"
+#include "jvmti.h"
+
+namespace openjdkjvmti {
+
+class MethodUtil {
+ public:
+  static jvmtiError GetMethodName(jvmtiEnv* env,
+                                  jmethodID method,
+                                  char** name_ptr,
+                                  char** signature_ptr,
+                                  char** generic_ptr);
+
+  static jvmtiError GetMethodDeclaringClass(jvmtiEnv* env,
+                                            jmethodID method,
+                                            jclass* declaring_class_ptr);
+
+  static jvmtiError GetMethodModifiers(jvmtiEnv* env,
+                                       jmethodID method,
+                                       jint* modifiers_ptr);
+};
+
+}  // namespace openjdkjvmti
+
+#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_METHOD_H_
diff --git a/runtime/openjdkjvmti/ti_stack.cc b/runtime/openjdkjvmti/ti_stack.cc
new file mode 100644
index 0000000..6f8976f
--- /dev/null
+++ b/runtime/openjdkjvmti/ti_stack.cc
@@ -0,0 +1,196 @@
+/* Copyright (C) 2016 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This file implements interfaces from the file jvmti.h. This implementation
+ * is licensed under the same terms as the file jvmti.h.  The
+ * copyright and license information for the file jvmti.h follows.
+ *
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "ti_stack.h"
+
+#include "art_jvmti.h"
+#include "art_method-inl.h"
+#include "base/enums.h"
+#include "dex_file.h"
+#include "dex_file_annotations.h"
+#include "jni_env_ext.h"
+#include "jni_internal.h"
+#include "mirror/class.h"
+#include "mirror/dex_cache.h"
+#include "scoped_thread_state_change-inl.h"
+#include "stack.h"
+#include "thread.h"
+#include "thread_pool.h"
+
+namespace openjdkjvmti {
+
+struct GetStackTraceVisitor : public art::StackVisitor {
+  GetStackTraceVisitor(art::Thread* thread_in,
+                       art::ScopedObjectAccessAlreadyRunnable& soa_,
+                       size_t start_,
+                       size_t stop_)
+      : StackVisitor(thread_in, nullptr, StackVisitor::StackWalkKind::kIncludeInlinedFrames),
+        soa(soa_),
+        start(start_),
+        stop(stop_) {}
+
+  bool VisitFrame() REQUIRES_SHARED(art::Locks::mutator_lock_) {
+    art::ArtMethod* m = GetMethod();
+    if (m->IsRuntimeMethod()) {
+      return true;
+    }
+
+    if (start == 0) {
+      m = m->GetInterfaceMethodIfProxy(art::kRuntimePointerSize);
+      jmethodID id = art::jni::EncodeArtMethod(m);
+
+      art::mirror::DexCache* dex_cache = m->GetDexCache();
+      int32_t line_number = -1;
+      if (dex_cache != nullptr) {  // be tolerant of bad input
+        const art::DexFile* dex_file = dex_cache->GetDexFile();
+        line_number = art::annotations::GetLineNumFromPC(dex_file, m, GetDexPc(false));
+      }
+
+      jvmtiFrameInfo info = { id, static_cast<jlong>(line_number) };
+      frames.push_back(info);
+
+      if (stop == 1) {
+        return false;  // We're done.
+      } else if (stop > 0) {
+        stop--;
+      }
+    } else {
+      start--;
+    }
+
+    return true;
+  }
+
+  art::ScopedObjectAccessAlreadyRunnable& soa;
+  std::vector<jvmtiFrameInfo> frames;
+  size_t start;
+  size_t stop;
+};
+
+struct GetStackTraceClosure : public art::Closure {
+ public:
+  GetStackTraceClosure(size_t start, size_t stop)
+      : start_input(start),
+        stop_input(stop),
+        start_result(0),
+        stop_result(0) {}
+
+  void Run(art::Thread* self) OVERRIDE {
+    art::ScopedObjectAccess soa(art::Thread::Current());
+
+    GetStackTraceVisitor visitor(self, soa, start_input, stop_input);
+    visitor.WalkStack(false);
+
+    frames.swap(visitor.frames);
+    start_result = visitor.start;
+    stop_result = visitor.stop;
+  }
+
+  const size_t start_input;
+  const size_t stop_input;
+
+  std::vector<jvmtiFrameInfo> frames;
+  size_t start_result;
+  size_t stop_result;
+};
+
+jvmtiError StackUtil::GetStackTrace(jvmtiEnv* jvmti_env ATTRIBUTE_UNUSED,
+                                    jthread java_thread,
+                                    jint start_depth,
+                                    jint max_frame_count,
+                                    jvmtiFrameInfo* frame_buffer,
+                                    jint* count_ptr) {
+  if (java_thread == nullptr) {
+    return ERR(INVALID_THREAD);
+  }
+
+  art::Thread* thread;
+  {
+    // TODO: Need non-aborting call here, to return JVMTI_ERROR_INVALID_THREAD.
+    art::ScopedObjectAccess soa(art::Thread::Current());
+    art::MutexLock mu(soa.Self(), *art::Locks::thread_list_lock_);
+    thread = art::Thread::FromManagedThread(soa, java_thread);
+    DCHECK(thread != nullptr);
+  }
+
+  art::ThreadState state = thread->GetState();
+  if (state == art::ThreadState::kStarting ||
+      state == art::ThreadState::kTerminated ||
+      thread->IsStillStarting()) {
+    return ERR(THREAD_NOT_ALIVE);
+  }
+
+  if (max_frame_count < 0) {
+    return ERR(ILLEGAL_ARGUMENT);
+  }
+  if (frame_buffer == nullptr || count_ptr == nullptr) {
+    return ERR(NULL_POINTER);
+  }
+
+  if (max_frame_count == 0) {
+    *count_ptr = 0;
+    return ERR(NONE);
+  }
+
+  GetStackTraceClosure closure(start_depth >= 0 ? static_cast<size_t>(start_depth) : 0,
+                               start_depth >= 0 ?static_cast<size_t>(max_frame_count) : 0);
+  thread->RequestSynchronousCheckpoint(&closure);
+
+  size_t collected_frames = closure.frames.size();
+
+  // Frames from the top.
+  if (start_depth >= 0) {
+    if (closure.start_result != 0) {
+      // Not enough frames.
+      return ERR(ILLEGAL_ARGUMENT);
+    }
+    DCHECK_LE(collected_frames, static_cast<size_t>(max_frame_count));
+    if (closure.frames.size() > 0) {
+      memcpy(frame_buffer, closure.frames.data(), collected_frames * sizeof(jvmtiFrameInfo));
+    }
+    *count_ptr = static_cast<jint>(closure.frames.size());
+    return ERR(NONE);
+  }
+
+  // Frames from the bottom.
+  if (collected_frames < static_cast<size_t>(-start_depth)) {
+    return ERR(ILLEGAL_ARGUMENT);
+  }
+
+  size_t count = std::min(static_cast<size_t>(-start_depth), static_cast<size_t>(max_frame_count));
+  memcpy(frame_buffer,
+         &closure.frames.data()[collected_frames + start_depth],
+         count * sizeof(jvmtiFrameInfo));
+  *count_ptr = static_cast<jint>(count);
+  return ERR(NONE);
+}
+
+}  // namespace openjdkjvmti
diff --git a/runtime/openjdkjvmti/ti_stack.h b/runtime/openjdkjvmti/ti_stack.h
new file mode 100644
index 0000000..1931ed3
--- /dev/null
+++ b/runtime/openjdkjvmti/ti_stack.h
@@ -0,0 +1,51 @@
+/* Copyright (C) 2016 The Android Open Source Project
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This file implements interfaces from the file jvmti.h. This implementation
+ * is licensed under the same terms as the file jvmti.h.  The
+ * copyright and license information for the file jvmti.h follows.
+ *
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#ifndef ART_RUNTIME_OPENJDKJVMTI_TI_STACK_H_
+#define ART_RUNTIME_OPENJDKJVMTI_TI_STACK_H_
+
+#include "jvmti.h"
+
+namespace openjdkjvmti {
+
+class StackUtil {
+ public:
+  static jvmtiError GetStackTrace(jvmtiEnv* env,
+                                  jthread thread,
+                                  jint start_depth,
+                                  jint max_frame_count,
+                                  jvmtiFrameInfo* frame_buffer,
+                                  jint* count_ptr);
+};
+
+}  // namespace openjdkjvmti
+
+#endif  // ART_RUNTIME_OPENJDKJVMTI_TI_STACK_H_
diff --git a/runtime/openjdkjvmti/transform.cc b/runtime/openjdkjvmti/transform.cc
index f59e01e..fa2983c 100644
--- a/runtime/openjdkjvmti/transform.cc
+++ b/runtime/openjdkjvmti/transform.cc
@@ -283,7 +283,7 @@
 // Install the new dex file.
 // TODO do error checks for bad state (method in a stack, changes to number of methods/fields/etc).
 jvmtiError MoveTransformedFileIntoRuntime(jclass jklass,
-                                          std::string original_location,
+                                          const std::string& original_location,
                                           jint data_len,
                                           unsigned char* dex_data) {
   const char* dex_file_name = "Ldalvik/system/DexFile;";
@@ -327,8 +327,7 @@
         class_linker->FindClass(self, dex_file_name, null_loader)
           ->FindDeclaredInstanceField("mInternalCookie", "Ljava/lang/Object;");
     CHECK(dex_file_cookie_field != nullptr);
-    art::Handle<art::mirror::Class> klass(
-        hs.NewHandle(art::down_cast<art::mirror::Class*>(self->DecodeJObject(jklass))));
+    art::Handle<art::mirror::Class> klass(hs.NewHandle(self->DecodeJObject(jklass)->AsClass()));
     art::mirror::Object* dex_file_ptr = nullptr;
     art::mirror::ClassLoader* class_loader_ptr = nullptr;
     // Find dalvik.system.DexFile that represents the dex file we are changing.
diff --git a/runtime/openjdkjvmti/transform.h b/runtime/openjdkjvmti/transform.h
index 85bcb00..a76ed93 100644
--- a/runtime/openjdkjvmti/transform.h
+++ b/runtime/openjdkjvmti/transform.h
@@ -54,7 +54,7 @@
 
 // Install the new dex file.
 jvmtiError MoveTransformedFileIntoRuntime(jclass jklass,
-                                          std::string original_location,
+                                          const std::string& original_location,
                                           jint data_len,
                                           unsigned char* dex_data);
 
diff --git a/runtime/parsed_options.cc b/runtime/parsed_options.cc
index f937ca7..e1022b0 100644
--- a/runtime/parsed_options.cc
+++ b/runtime/parsed_options.cc
@@ -300,6 +300,8 @@
       .Define("-Xplugin:_")
           .WithType<std::vector<Plugin>>().AppendValues()
           .IntoKey(M::Plugins)
+      .Define("-Xfully-deoptable")
+          .IntoKey(M::FullyDeoptable)
       .Ignore({
           "-ea", "-da", "-enableassertions", "-disableassertions", "--runtime-arg", "-esa",
           "-dsa", "-enablesystemassertions", "-disablesystemassertions", "-Xrs", "-Xint:_",
@@ -601,7 +603,7 @@
                  << "runtime plugins.";
   } else if (!args.GetOrDefault(M::Plugins).empty()) {
     LOG(WARNING) << "Experimental runtime plugin support has not been enabled. Ignored options: ";
-    for (auto& op : args.GetOrDefault(M::Plugins)) {
+    for (const auto& op : args.GetOrDefault(M::Plugins)) {
       LOG(WARNING) << "    -plugin:" << op.GetLibrary();
     }
   }
@@ -614,14 +616,14 @@
   } else if (!args.GetOrDefault(M::AgentLib).empty() || !args.GetOrDefault(M::AgentPath).empty()) {
     LOG(WARNING) << "agent support has not been enabled. Enable experimental agent "
                  << " support with '-XExperimental:agent'. Ignored options are:";
-    for (auto op : args.GetOrDefault(M::AgentLib)) {
+    for (const auto& op : args.GetOrDefault(M::AgentLib)) {
       if (op.HasArgs()) {
         LOG(WARNING) << "    -agentlib:" << op.GetName() << "=" << op.GetArgs();
       } else {
         LOG(WARNING) << "    -agentlib:" << op.GetName();
       }
     }
-    for (auto op : args.GetOrDefault(M::AgentPath)) {
+    for (const auto& op : args.GetOrDefault(M::AgentPath)) {
       if (op.HasArgs()) {
         LOG(WARNING) << "    -agentpath:" << op.GetName() << "=" << op.GetArgs();
       } else {
diff --git a/runtime/plugin.h b/runtime/plugin.h
index 18f3977..f077aaf 100644
--- a/runtime/plugin.h
+++ b/runtime/plugin.h
@@ -34,7 +34,7 @@
 // single-threaded fashion so not much need
 class Plugin {
  public:
-  static Plugin Create(std::string lib) {
+  static Plugin Create(const std::string& lib) {
     return Plugin(lib);
   }
 
@@ -66,7 +66,7 @@
   }
 
  private:
-  explicit Plugin(std::string library) : library_(library), dlopen_handle_(nullptr) { }
+  explicit Plugin(const std::string& library) : library_(library), dlopen_handle_(nullptr) { }
 
   std::string library_;
   void* dlopen_handle_;
diff --git a/runtime/primitive.cc b/runtime/primitive.cc
index d29a060..2380284 100644
--- a/runtime/primitive.cc
+++ b/runtime/primitive.cc
@@ -31,11 +31,35 @@
   "PrimVoid",
 };
 
+static const char* kBoxedDescriptors[] = {
+  "Ljava/lang/Object;",
+  "Ljava/lang/Boolean;",
+  "Ljava/lang/Byte;",
+  "Ljava/lang/Character;",
+  "Ljava/lang/Short;",
+  "Ljava/lang/Integer;",
+  "Ljava/lang/Long;",
+  "Ljava/lang/Float;",
+  "Ljava/lang/Double;",
+  "Ljava/lang/Void;",
+};
+
+#define COUNT_OF(x) (sizeof(x) / sizeof(x[0]))
+
 const char* Primitive::PrettyDescriptor(Primitive::Type type) {
+  static_assert(COUNT_OF(kTypeNames) == static_cast<size_t>(Primitive::kPrimLast) + 1,
+                "Missing element");
   CHECK(Primitive::kPrimNot <= type && type <= Primitive::kPrimVoid) << static_cast<int>(type);
   return kTypeNames[type];
 }
 
+const char* Primitive::BoxedDescriptor(Primitive::Type type) {
+  static_assert(COUNT_OF(kBoxedDescriptors) == static_cast<size_t>(Primitive::kPrimLast) + 1,
+                "Missing element");
+  CHECK(Primitive::kPrimNot <= type && type <= Primitive::kPrimVoid) << static_cast<int>(type);
+  return kBoxedDescriptors[type];
+}
+
 std::ostream& operator<<(std::ostream& os, const Primitive::Type& type) {
   int32_t int_type = static_cast<int32_t>(type);
   if (type >= Primitive::kPrimNot && type <= Primitive::kPrimVoid) {
diff --git a/runtime/primitive.h b/runtime/primitive.h
index 18f45ff..a0edaee 100644
--- a/runtime/primitive.h
+++ b/runtime/primitive.h
@@ -138,6 +138,9 @@
 
   static const char* PrettyDescriptor(Type type);
 
+  // Returns the descriptor corresponding to the boxed type of |type|.
+  static const char* BoxedDescriptor(Type type);
+
   static bool IsFloatingPointType(Type type) {
     return type == kPrimFloat || type == kPrimDouble;
   }
@@ -158,6 +161,35 @@
     }
   }
 
+  // Return true if |type| is an numeric type.
+  static constexpr bool IsNumericType(Type type) {
+    switch (type) {
+      case Primitive::Type::kPrimNot: return false;
+      case Primitive::Type::kPrimBoolean: return false;
+      case Primitive::Type::kPrimByte: return true;
+      case Primitive::Type::kPrimChar: return false;
+      case Primitive::Type::kPrimShort: return true;
+      case Primitive::Type::kPrimInt: return true;
+      case Primitive::Type::kPrimLong: return true;
+      case Primitive::Type::kPrimFloat: return true;
+      case Primitive::Type::kPrimDouble: return true;
+      case Primitive::Type::kPrimVoid: return false;
+    }
+  }
+
+  // Returns true if it is possible to widen type |from| to type |to|. Both |from| and
+  // |to| should be numeric primitive types.
+  static bool IsWidenable(Type from, Type to) {
+    static_assert(Primitive::Type::kPrimByte < Primitive::Type::kPrimShort, "Bad ordering");
+    static_assert(Primitive::Type::kPrimShort < Primitive::Type::kPrimInt, "Bad ordering");
+    static_assert(Primitive::Type::kPrimInt < Primitive::Type::kPrimLong, "Bad ordering");
+    static_assert(Primitive::Type::kPrimLong < Primitive::Type::kPrimFloat, "Bad ordering");
+    static_assert(Primitive::Type::kPrimFloat < Primitive::Type::kPrimDouble, "Bad ordering");
+    // Widening is only applicable between numeric types, like byte
+    // and int. Non-numeric types, such as boolean, cannot be widened.
+    return IsNumericType(from) && IsNumericType(to) && from <= to;
+  }
+
   static bool IsIntOrLongType(Type type) {
     return type == kPrimInt || type == kPrimLong;
   }
diff --git a/runtime/proxy_test.cc b/runtime/proxy_test.cc
index 1119ccf..fd7e56d 100644
--- a/runtime/proxy_test.cc
+++ b/runtime/proxy_test.cc
@@ -128,8 +128,8 @@
   ASSERT_TRUE(proxy_class->IsInitialized());
 
   EXPECT_EQ(2U, proxy_class->NumDirectInterfaces());  // Interfaces$I and Interfaces$J.
-  EXPECT_EQ(I.Get(), mirror::Class::GetDirectInterface(soa.Self(), proxy_class, 0));
-  EXPECT_EQ(J.Get(), mirror::Class::GetDirectInterface(soa.Self(), proxy_class, 1));
+  EXPECT_OBJ_PTR_EQ(I.Get(), mirror::Class::GetDirectInterface(soa.Self(), proxy_class, 0));
+  EXPECT_OBJ_PTR_EQ(J.Get(), mirror::Class::GetDirectInterface(soa.Self(), proxy_class, 1));
   std::string temp;
   const char* proxy_class_descriptor = proxy_class->GetDescriptor(&temp);
   EXPECT_STREQ("L$Proxy1234;", proxy_class_descriptor);
@@ -180,7 +180,7 @@
   ArtField* field = &static_fields->At(0);
   EXPECT_STREQ("interfaces", field->GetName());
   EXPECT_STREQ("[Ljava/lang/Class;", field->GetTypeDescriptor());
-  EXPECT_OBJ_PTR_EQ(MakeObjPtr(interfacesFieldClass.Get()), field->GetType<true>());
+  EXPECT_OBJ_PTR_EQ(interfacesFieldClass.Get(), field->GetType<true>());
   std::string temp;
   EXPECT_STREQ("L$Proxy1234;", field->GetDeclaringClass()->GetDescriptor(&temp));
   EXPECT_FALSE(field->IsPrimitiveType());
@@ -189,7 +189,7 @@
   field = &static_fields->At(1);
   EXPECT_STREQ("throws", field->GetName());
   EXPECT_STREQ("[[Ljava/lang/Class;", field->GetTypeDescriptor());
-  EXPECT_OBJ_PTR_EQ(MakeObjPtr(throwsFieldClass.Get()), field->GetType<true>());
+  EXPECT_OBJ_PTR_EQ(throwsFieldClass.Get(), field->GetType<true>());
   EXPECT_STREQ("L$Proxy1234;", field->GetDeclaringClass()->GetDescriptor(&temp));
   EXPECT_FALSE(field->IsPrimitiveType());
 }
@@ -199,8 +199,6 @@
   ScopedObjectAccess soa(Thread::Current());
   jobject jclass_loader = LoadDex("Interfaces");
   StackHandleScope<7> hs(soa.Self());
-  Handle<mirror::ClassLoader> class_loader(
-      hs.NewHandle(soa.Decode<mirror::ClassLoader>(jclass_loader)));
 
   Handle<mirror::Class> proxyClass0;
   Handle<mirror::Class> proxyClass1;
@@ -224,10 +222,10 @@
   ASSERT_TRUE(static_fields1 != nullptr);
   ASSERT_EQ(2u, static_fields1->size());
 
-  EXPECT_OBJ_PTR_EQ(static_fields0->At(0).GetDeclaringClass(), MakeObjPtr(proxyClass0.Get()));
-  EXPECT_OBJ_PTR_EQ(static_fields0->At(1).GetDeclaringClass(), MakeObjPtr(proxyClass0.Get()));
-  EXPECT_OBJ_PTR_EQ(static_fields1->At(0).GetDeclaringClass(), MakeObjPtr(proxyClass1.Get()));
-  EXPECT_OBJ_PTR_EQ(static_fields1->At(1).GetDeclaringClass(), MakeObjPtr(proxyClass1.Get()));
+  EXPECT_OBJ_PTR_EQ(static_fields0->At(0).GetDeclaringClass(), proxyClass0.Get());
+  EXPECT_OBJ_PTR_EQ(static_fields0->At(1).GetDeclaringClass(), proxyClass0.Get());
+  EXPECT_OBJ_PTR_EQ(static_fields1->At(0).GetDeclaringClass(), proxyClass1.Get());
+  EXPECT_OBJ_PTR_EQ(static_fields1->At(1).GetDeclaringClass(), proxyClass1.Get());
 
   ASSERT_EQ(Runtime::Current()->GetClassLinker()->GetImagePointerSize(), kRuntimePointerSize);
   ASSERT_FALSE(Runtime::Current()->IsActiveTransaction());
diff --git a/runtime/quick_exception_handler.cc b/runtime/quick_exception_handler.cc
index 9056d96..a81458f 100644
--- a/runtime/quick_exception_handler.cc
+++ b/runtime/quick_exception_handler.cc
@@ -145,7 +145,7 @@
   if (kDebugExceptionDelivery) {
     mirror::String* msg = exception->GetDetailMessage();
     std::string str_msg(msg != nullptr ? msg->ToModifiedUtf8() : "");
-    self_->DumpStack(LOG_STREAM(INFO) << "Delivering exception: " << PrettyTypeOf(exception)
+    self_->DumpStack(LOG_STREAM(INFO) << "Delivering exception: " << exception->PrettyTypeOf()
                      << ": " << str_msg << "\n");
   }
   StackHandleScope<1> hs(self_);
@@ -162,7 +162,8 @@
     if (handler_method_ != nullptr) {
       const DexFile* dex_file = handler_method_->GetDeclaringClass()->GetDexCache()->GetDexFile();
       int line_number = annotations::GetLineNumFromPC(dex_file, handler_method_, handler_dex_pc_);
-      LOG(INFO) << "Handler: " << PrettyMethod(handler_method_) << " (line: " << line_number << ")";
+      LOG(INFO) << "Handler: " << handler_method_->PrettyMethod() << " (line: "
+                << line_number << ")";
     }
   }
   if (clear_exception_) {
@@ -262,8 +263,8 @@
                                                    vreg_kind,
                                                    &vreg_value);
     CHECK(get_vreg_success) << "VReg " << vreg << " was optimized out ("
-                            << "method=" << PrettyMethod(stack_visitor->GetMethod()) << ", "
-                            << "dex_pc=" << stack_visitor->GetDexPc() << ", "
+                            << "method=" << ArtMethod::PrettyMethod(stack_visitor->GetMethod())
+                            << ", dex_pc=" << stack_visitor->GetDexPc() << ", "
                             << "native_pc_offset=" << stack_visitor->GetNativePcOffset() << ")";
 
     // Copy value to the catch phi's stack slot.
@@ -323,7 +324,7 @@
     if (GetMethod() == nullptr) {
       exception_handler_->SetFullFragmentDone(true);
     } else {
-      CHECK(callee_method_ != nullptr) << art::PrettyMethod(GetMethod(), false);
+      CHECK(callee_method_ != nullptr) << GetMethod()->PrettyMethod(false);
       exception_handler_->SetHandlerQuickArg0(reinterpret_cast<uintptr_t>(callee_method_));
     }
   }
@@ -669,7 +670,7 @@
       return true;
     } else if (method->IsRuntimeMethod()) {
       if (show_details_) {
-        LOG(INFO) << "R  " << PrettyMethod(method, true);
+        LOG(INFO) << "R  " << method->PrettyMethod(true);
       }
       return true;
     } else {
@@ -677,7 +678,7 @@
       LOG(INFO) << (is_shadow ? "S" : "Q")
                 << ((!is_shadow && IsInInlinedFrame()) ? "i" : " ")
                 << " "
-                << PrettyMethod(method, true);
+                << method->PrettyMethod(true);
       return true;  // Go on.
     }
   }
diff --git a/runtime/read_barrier-inl.h b/runtime/read_barrier-inl.h
index 92efa21..37cf257 100644
--- a/runtime/read_barrier-inl.h
+++ b/runtime/read_barrier-inl.h
@@ -40,29 +40,28 @@
       }
     }
     if (kUseBakerReadBarrier) {
-      // The higher bits of the rb_ptr, rb_ptr_high_bits (must be zero)
-      // is used to create artificial data dependency from the is_gray
-      // load to the ref field (ptr) load to avoid needing a load-load
-      // barrier between the two.
-      uintptr_t rb_ptr_high_bits;
-      bool is_gray = HasGrayReadBarrierPointer(obj, &rb_ptr_high_bits);
+      // fake_address_dependency (must be zero) is used to create artificial data dependency from
+      // the is_gray load to the ref field (ptr) load to avoid needing a load-load barrier between
+      // the two.
+      uintptr_t fake_address_dependency;
+      bool is_gray = IsGray(obj, &fake_address_dependency);
+      if (kEnableReadBarrierInvariantChecks) {
+        CHECK_EQ(fake_address_dependency, 0U) << obj << " rb_state=" << obj->GetReadBarrierState();
+      }
       ref_addr = reinterpret_cast<mirror::HeapReference<MirrorType>*>(
-          rb_ptr_high_bits | reinterpret_cast<uintptr_t>(ref_addr));
+          fake_address_dependency | reinterpret_cast<uintptr_t>(ref_addr));
       MirrorType* ref = ref_addr->AsMirrorPtr();
       MirrorType* old_ref = ref;
       if (is_gray) {
         // Slow-path.
         ref = reinterpret_cast<MirrorType*>(Mark(ref));
         // If kAlwaysUpdateField is true, update the field atomically. This may fail if mutator
-        // updates before us, but it's ok.
+        // updates before us, but it's OK.
         if (kAlwaysUpdateField && ref != old_ref) {
           obj->CasFieldStrongRelaxedObjectWithoutWriteBarrier<false, false>(
               offset, old_ref, ref);
         }
       }
-      if (kEnableReadBarrierInvariantChecks) {
-        CHECK_EQ(rb_ptr_high_bits, 0U) << obj << " rb_ptr=" << obj->GetReadBarrierPointer();
-      }
       AssertToSpaceInvariant(obj, offset, ref);
       return ref;
     } else if (kUseBrooksReadBarrier) {
@@ -223,20 +222,14 @@
   return Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->MarkFromReadBarrier(obj);
 }
 
-inline bool ReadBarrier::HasGrayReadBarrierPointer(mirror::Object* obj,
-                                                   uintptr_t* out_rb_ptr_high_bits) {
-  mirror::Object* rb_ptr = obj->GetReadBarrierPointer();
-  uintptr_t rb_ptr_bits = reinterpret_cast<uintptr_t>(rb_ptr);
-  uintptr_t rb_ptr_low_bits = rb_ptr_bits & rb_ptr_mask_;
-  if (kEnableReadBarrierInvariantChecks) {
-    CHECK(rb_ptr_low_bits == white_ptr_ || rb_ptr_low_bits == gray_ptr_ ||
-          rb_ptr_low_bits == black_ptr_)
-        << "obj=" << obj << " rb_ptr=" << rb_ptr << " " << PrettyTypeOf(obj);
-  }
-  bool is_gray = rb_ptr_low_bits == gray_ptr_;
-  // The high bits are supposed to be zero. We check this on the caller side.
-  *out_rb_ptr_high_bits = rb_ptr_bits & ~rb_ptr_mask_;
-  return is_gray;
+inline bool ReadBarrier::IsGray(mirror::Object* obj, uintptr_t* fake_address_dependency) {
+  return obj->GetReadBarrierState(fake_address_dependency) == gray_state_;
+}
+
+inline bool ReadBarrier::IsGray(mirror::Object* obj) {
+  // Use a load-acquire to load the read barrier bit to avoid reordering with the subsequent load.
+  // GetReadBarrierStateAcquire() has load-acquire semantics.
+  return obj->GetReadBarrierStateAcquire() == gray_state_;
 }
 
 }  // namespace art
diff --git a/runtime/read_barrier.h b/runtime/read_barrier.h
index a861861..cbc2697 100644
--- a/runtime/read_barrier.h
+++ b/runtime/read_barrier.h
@@ -82,26 +82,32 @@
   // ALWAYS_INLINE on this caused a performance regression b/26744236.
   static mirror::Object* Mark(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  static mirror::Object* WhitePtr() {
-    return reinterpret_cast<mirror::Object*>(white_ptr_);
+  static constexpr uint32_t WhiteState() {
+    return white_state_;
   }
-  static mirror::Object* GrayPtr() {
-    return reinterpret_cast<mirror::Object*>(gray_ptr_);
-  }
-  static mirror::Object* BlackPtr() {
-    return reinterpret_cast<mirror::Object*>(black_ptr_);
+  static constexpr uint32_t GrayState() {
+    return gray_state_;
   }
 
-  ALWAYS_INLINE static bool HasGrayReadBarrierPointer(mirror::Object* obj,
-                                                      uintptr_t* out_rb_ptr_high_bits)
+  // fake_address_dependency will be zero which should be bitwise-or'ed with the address of the
+  // subsequent load to prevent the reordering of the read barrier bit load and the subsequent
+  // object reference load (from one of `obj`'s fields).
+  // *fake_address_dependency will be set to 0.
+  ALWAYS_INLINE static bool IsGray(mirror::Object* obj, uintptr_t* fake_address_dependency)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  // Note: These couldn't be constexpr pointers as reinterpret_cast isn't compatible with them.
-  static constexpr uintptr_t white_ptr_ = 0x0;    // Not marked.
-  static constexpr uintptr_t gray_ptr_ = 0x1;     // Marked, but not marked through. On mark stack.
-  // TODO: black_ptr_ is unused, we should remove it.
-  static constexpr uintptr_t black_ptr_ = 0x2;    // Marked through. Used for non-moving objects.
-  static constexpr uintptr_t rb_ptr_mask_ = 0x1;  // The low bits for white|gray.
+  // This uses a load-acquire to load the read barrier bit internally to prevent the reordering of
+  // the read barrier bit load and the subsequent load.
+  ALWAYS_INLINE static bool IsGray(mirror::Object* obj)
+      REQUIRES_SHARED(Locks::mutator_lock_);
+
+  static bool IsValidReadBarrierState(uint32_t rb_state) {
+    return rb_state == white_state_ || rb_state == gray_state_;
+  }
+
+  static constexpr uint32_t white_state_ = 0x0;    // Not marked.
+  static constexpr uint32_t gray_state_ = 0x1;     // Marked, but not marked through. On mark stack.
+  static constexpr uint32_t rb_state_mask_ = 0x1;  // The low bits for white|gray.
 };
 
 }  // namespace art
diff --git a/runtime/reference_table.cc b/runtime/reference_table.cc
index 0be79ef..1c975a4 100644
--- a/runtime/reference_table.cc
+++ b/runtime/reference_table.cc
@@ -39,7 +39,7 @@
 ReferenceTable::~ReferenceTable() {
 }
 
-void ReferenceTable::Add(mirror::Object* obj) {
+void ReferenceTable::Add(ObjPtr<mirror::Object> obj) {
   DCHECK(obj != nullptr);
   VerifyObject(obj);
   if (entries_.size() >= max_size_) {
@@ -49,10 +49,10 @@
   entries_.push_back(GcRoot<mirror::Object>(obj));
 }
 
-void ReferenceTable::Remove(mirror::Object* obj) {
+void ReferenceTable::Remove(ObjPtr<mirror::Object> obj) {
   // We iterate backwards on the assumption that references are LIFO.
   for (int i = entries_.size() - 1; i >= 0; --i) {
-    mirror::Object* entry = entries_[i].Read();
+    ObjPtr<mirror::Object> entry = entries_[i].Read();
     if (entry == obj) {
       entries_.erase(entries_.begin() + i);
       return;
@@ -62,7 +62,7 @@
 
 // If "obj" is an array, return the number of elements in the array.
 // Otherwise, return zero.
-static size_t GetElementCount(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_) {
+static size_t GetElementCount(ObjPtr<mirror::Object> obj) REQUIRES_SHARED(Locks::mutator_lock_) {
   // We assume the special cleared value isn't an array in the if statement below.
   DCHECK(!Runtime::Current()->GetClearedJniWeakGlobal()->IsArrayInstance());
   if (obj == nullptr || !obj->IsArrayInstance()) {
@@ -76,7 +76,7 @@
 // Pass in the number of elements in the array (or 0 if this is not an
 // array object), and the number of additional objects that are identical
 // or equivalent to the original.
-static void DumpSummaryLine(std::ostream& os, mirror::Object* obj, size_t element_count,
+static void DumpSummaryLine(std::ostream& os, ObjPtr<mirror::Object> obj, size_t element_count,
                             int identical, int equiv)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   if (obj == nullptr) {
@@ -88,7 +88,7 @@
     return;
   }
 
-  std::string className(PrettyTypeOf(obj));
+  std::string className(obj->PrettyTypeOf());
   if (obj->IsClass()) {
     // We're summarizing multiple instances, so using the exemplar
     // Class' type parameter here would be misleading.
@@ -126,8 +126,8 @@
       // are no suspend points which can happen during the sorting process. This works since
       // we are guaranteed that the addresses of obj1, obj2, obj1->GetClass, obj2->GetClass wont
       // change during the sorting process. The classes are forwarded by ref->GetClass().
-      mirror::Object* obj1 = root1.Read<kWithoutReadBarrier>();
-      mirror::Object* obj2 = root2.Read<kWithoutReadBarrier>();
+      ObjPtr<mirror::Object> obj1 = root1.Read<kWithoutReadBarrier>();
+      ObjPtr<mirror::Object> obj2 = root2.Read<kWithoutReadBarrier>();
       DCHECK(obj1 != nullptr);
       DCHECK(obj2 != nullptr);
       Runtime* runtime = Runtime::Current();
@@ -144,7 +144,7 @@
         return size1 < size2;
       }
       // ...and finally by address.
-      return obj1 < obj2;
+      return obj1.Ptr() < obj2.Ptr();
     }
   };
 
@@ -163,7 +163,7 @@
   os << "  Last " << (count - first) << " entries (of " << count << "):\n";
   Runtime* runtime = Runtime::Current();
   for (int idx = count - 1; idx >= first; --idx) {
-    mirror::Object* ref = entries[idx].Read();
+    ObjPtr<mirror::Object> ref = entries[idx].Read();
     if (ref == nullptr) {
       continue;
     }
@@ -174,18 +174,18 @@
     if (ref->GetClass() == nullptr) {
       // should only be possible right after a plain dvmMalloc().
       size_t size = ref->SizeOf();
-      os << StringPrintf("    %5d: %p (raw) (%zd bytes)\n", idx, ref, size);
+      os << StringPrintf("    %5d: %p (raw) (%zd bytes)\n", idx, ref.Ptr(), size);
       continue;
     }
 
-    std::string className(PrettyTypeOf(ref));
+    std::string className(ref->PrettyTypeOf());
 
     std::string extras;
     size_t element_count = GetElementCount(ref);
     if (element_count != 0) {
       StringAppendF(&extras, " (%zd elements)", element_count);
     } else if (ref->GetClass()->IsStringClass()) {
-      mirror::String* s = ref->AsString();
+      ObjPtr<mirror::String> s = ref->AsString();
       std::string utf8(s->ToModifiedUtf8());
       if (s->GetLength() <= 16) {
         StringAppendF(&extras, " \"%s\"", utf8.c_str());
@@ -193,11 +193,11 @@
         StringAppendF(&extras, " \"%.16s... (%d chars)", utf8.c_str(), s->GetLength());
       }
     } else if (ref->IsReferenceInstance()) {
-      mirror::Object* referent = ref->AsReference()->GetReferent();
+      ObjPtr<mirror::Object> referent = ref->AsReference()->GetReferent();
       if (referent == nullptr) {
         extras = " (referent is null)";
       } else {
-        extras = StringPrintf(" (referent is a %s)", PrettyTypeOf(referent).c_str());
+        extras = StringPrintf(" (referent is a %s)", referent->PrettyTypeOf().c_str());
       }
     }
     os << StringPrintf("    %5d: ", idx) << ref << " " << className << extras << "\n";
@@ -215,33 +215,87 @@
   }
   std::sort(sorted_entries.begin(), sorted_entries.end(), GcRootComparator());
 
+  class SummaryElement {
+   public:
+    GcRoot<mirror::Object> root;
+    size_t equiv;
+    size_t identical;
+
+    SummaryElement() : equiv(0), identical(0) {}
+    SummaryElement(SummaryElement&& ref) {
+      root = ref.root;
+      equiv = ref.equiv;
+      identical = ref.identical;
+    }
+    SummaryElement(const SummaryElement&) = default;
+    SummaryElement& operator=(SummaryElement&&) = default;
+
+    void Reset(GcRoot<mirror::Object>& _root) {
+      root = _root;
+      equiv = 0;
+      identical = 0;
+    }
+  };
+  std::vector<SummaryElement> sorted_summaries;
+  {
+    SummaryElement prev;
+
+    for (GcRoot<mirror::Object>& root : sorted_entries) {
+      ObjPtr<mirror::Object> current = root.Read<kWithoutReadBarrier>();
+
+      if (UNLIKELY(prev.root.IsNull())) {
+        prev.Reset(root);
+        continue;
+      }
+
+      ObjPtr<mirror::Object> prevObj = prev.root.Read<kWithoutReadBarrier>();
+      if (current == prevObj) {
+        // Same reference, added more than once.
+        ++prev.identical;
+      } else if (current->GetClass() == prevObj->GetClass() &&
+          GetElementCount(current) == GetElementCount(prevObj)) {
+        // Same class / element count, different object.
+        ++prev.equiv;
+      } else {
+        sorted_summaries.push_back(prev);
+        prev.Reset(root);
+      }
+      prev.root = root;
+    }
+    sorted_summaries.push_back(prev);
+
+    // Compare summary elements, first by combined count, then by identical (indicating leaks),
+    // then by class (and size and address).
+    struct SummaryElementComparator {
+      GcRootComparator gc_root_cmp;
+
+      bool operator()(SummaryElement& elem1, SummaryElement& elem2) const
+          NO_THREAD_SAFETY_ANALYSIS {
+        Locks::mutator_lock_->AssertSharedHeld(Thread::Current());
+
+        size_t count1 = elem1.equiv + elem1.identical;
+        size_t count2 = elem2.equiv + elem2.identical;
+        if (count1 != count2) {
+          return count1 > count2;
+        }
+
+        if (elem1.identical != elem2.identical) {
+          return elem1.identical > elem2.identical;
+        }
+
+        // Otherwise, compare the GC roots as before.
+        return gc_root_cmp(elem1.root, elem2.root);
+      }
+    };
+    std::sort(sorted_summaries.begin(), sorted_summaries.end(), SummaryElementComparator());
+  }
+
   // Dump a summary of the whole table.
   os << "  Summary:\n";
-  size_t equiv = 0;
-  size_t identical = 0;
-  mirror::Object* prev = nullptr;
-  for (GcRoot<mirror::Object>& root : sorted_entries) {
-    mirror::Object* current = root.Read<kWithoutReadBarrier>();
-    if (prev != nullptr) {
-      const size_t element_count = GetElementCount(prev);
-      if (current == prev) {
-        // Same reference, added more than once.
-        ++identical;
-      } else if (current->GetClass() == prev->GetClass() &&
-          GetElementCount(current) == element_count) {
-        // Same class / element count, different object.
-        ++equiv;
-      } else {
-        // Different class.
-        DumpSummaryLine(os, prev, element_count, identical, equiv);
-        equiv = 0;
-        identical = 0;
-      }
-    }
-    prev = current;
+  for (SummaryElement& elem : sorted_summaries) {
+    ObjPtr<mirror::Object> elemObj = elem.root.Read<kWithoutReadBarrier>();
+    DumpSummaryLine(os, elemObj, GetElementCount(elemObj), elem.identical, elem.equiv);
   }
-  // Handle the last entry.
-  DumpSummaryLine(os, prev, GetElementCount(prev), identical, equiv);
 }
 
 void ReferenceTable::VisitRoots(RootVisitor* visitor, const RootInfo& root_info) {
diff --git a/runtime/reference_table.h b/runtime/reference_table.h
index 992ded0..8423e04 100644
--- a/runtime/reference_table.h
+++ b/runtime/reference_table.h
@@ -25,6 +25,7 @@
 #include "base/allocator.h"
 #include "base/mutex.h"
 #include "gc_root.h"
+#include "obj_ptr.h"
 #include "object_callbacks.h"
 
 namespace art {
@@ -41,9 +42,9 @@
   ReferenceTable(const char* name, size_t initial_size, size_t max_size);
   ~ReferenceTable();
 
-  void Add(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_);
+  void Add(ObjPtr<mirror::Object> obj) REQUIRES_SHARED(Locks::mutator_lock_);
 
-  void Remove(mirror::Object* obj) REQUIRES_SHARED(Locks::mutator_lock_);
+  void Remove(ObjPtr<mirror::Object> obj) REQUIRES_SHARED(Locks::mutator_lock_);
 
   size_t Size() const;
 
diff --git a/runtime/reference_table_test.cc b/runtime/reference_table_test.cc
index 489db9a..d80a9b3 100644
--- a/runtime/reference_table_test.cc
+++ b/runtime/reference_table_test.cc
@@ -166,4 +166,77 @@
   }
 }
 
+static std::vector<size_t> FindAll(const std::string& haystack, const char* needle) {
+  std::vector<size_t> res;
+  size_t start = 0;
+  do {
+    size_t pos = haystack.find(needle, start);
+    if (pos == std::string::npos) {
+      break;
+    }
+    res.push_back(pos);
+    start = pos + 1;
+  } while (start < haystack.size());
+  return res;
+}
+
+TEST_F(ReferenceTableTest, SummaryOrder) {
+  // Check that the summary statistics are sorted.
+  ScopedObjectAccess soa(Thread::Current());
+
+  ReferenceTable rt("test", 0, 20);
+
+  {
+    mirror::Object* s1 = mirror::String::AllocFromModifiedUtf8(soa.Self(), "hello");
+    mirror::Object* s2 = mirror::String::AllocFromModifiedUtf8(soa.Self(), "world");
+
+    // 3 copies of s1, 2 copies of s2, interleaved.
+    for (size_t i = 0; i != 2; ++i) {
+      rt.Add(s1);
+      rt.Add(s2);
+    }
+    rt.Add(s1);
+  }
+
+  {
+    // Differently sized byte arrays. Should be sorted by identical (non-unique cound).
+    mirror::Object* b1_1 = mirror::ByteArray::Alloc(soa.Self(), 1);
+    rt.Add(b1_1);
+    rt.Add(mirror::ByteArray::Alloc(soa.Self(), 2));
+    rt.Add(b1_1);
+    rt.Add(mirror::ByteArray::Alloc(soa.Self(), 2));
+    rt.Add(mirror::ByteArray::Alloc(soa.Self(), 1));
+    rt.Add(mirror::ByteArray::Alloc(soa.Self(), 2));
+  }
+
+  rt.Add(mirror::CharArray::Alloc(soa.Self(), 0));
+
+  // Now dump, and ensure order.
+  std::ostringstream oss;
+  rt.Dump(oss);
+
+  // Only do this on the part after Summary.
+  std::string base = oss.str();
+  size_t summary_pos = base.find("Summary:");
+  ASSERT_NE(summary_pos, std::string::npos);
+
+  std::string haystack = base.substr(summary_pos);
+
+  std::vector<size_t> strCounts = FindAll(haystack, "java.lang.String");
+  std::vector<size_t> b1Counts = FindAll(haystack, "byte[] (1 elements)");
+  std::vector<size_t> b2Counts = FindAll(haystack, "byte[] (2 elements)");
+  std::vector<size_t> cCounts = FindAll(haystack, "char[]");
+
+  // Only one each.
+  EXPECT_EQ(1u, strCounts.size());
+  EXPECT_EQ(1u, b1Counts.size());
+  EXPECT_EQ(1u, b2Counts.size());
+  EXPECT_EQ(1u, cCounts.size());
+
+  // Expect them to be in order.
+  EXPECT_LT(strCounts[0], b1Counts[0]);
+  EXPECT_LT(b1Counts[0], b2Counts[0]);
+  EXPECT_LT(b2Counts[0], cCounts[0]);
+}
+
 }  // namespace art
diff --git a/runtime/reflection-inl.h b/runtime/reflection-inl.h
index 52cdfb8..68e7a10 100644
--- a/runtime/reflection-inl.h
+++ b/runtime/reflection-inl.h
@@ -21,7 +21,7 @@
 
 #include "base/stringprintf.h"
 #include "common_throws.h"
-#include "jvalue.h"
+#include "jvalue-inl.h"
 #include "mirror/object-inl.h"
 #include "obj_ptr-inl.h"
 #include "primitive.h"
@@ -29,11 +29,10 @@
 
 namespace art {
 
-inline bool ConvertPrimitiveValue(bool unbox_for_result,
-                                  Primitive::Type srcType,
-                                  Primitive::Type dstType,
-                                  const JValue& src,
-                                  JValue* dst) {
+inline bool ConvertPrimitiveValueNoThrow(Primitive::Type srcType,
+                                         Primitive::Type dstType,
+                                         const JValue& src,
+                                         JValue* dst) {
   DCHECK(srcType != Primitive::kPrimNot && dstType != Primitive::kPrimNot);
   if (LIKELY(srcType == dstType)) {
     dst->SetJ(src.GetJ());
@@ -91,6 +90,18 @@
   default:
     break;
   }
+  return false;
+}
+
+inline bool ConvertPrimitiveValue(bool unbox_for_result,
+                                  Primitive::Type srcType,
+                                  Primitive::Type dstType,
+                                  const JValue& src,
+                                  JValue* dst) {
+  if (ConvertPrimitiveValueNoThrow(srcType, dstType, src, dst)) {
+    return true;
+  }
+
   if (!unbox_for_result) {
     ThrowIllegalArgumentException(StringPrintf("Invalid primitive conversion from %s to %s",
                                                PrettyDescriptor(srcType).c_str(),
diff --git a/runtime/reflection.cc b/runtime/reflection.cc
index de003e5..3128380 100644
--- a/runtime/reflection.cc
+++ b/runtime/reflection.cc
@@ -233,10 +233,10 @@
         if (UNLIKELY(arg == nullptr || !arg->InstanceOf(dst_class))) {
           ThrowIllegalArgumentException(
               StringPrintf("method %s argument %zd has type %s, got %s",
-                  PrettyMethod(m, false).c_str(),
+                  m->PrettyMethod(false).c_str(),
                   args_offset + 1,  // Humans don't count from 0.
-                  PrettyDescriptor(dst_class).c_str(),
-                  PrettyTypeOf(arg).c_str()).c_str());
+                  mirror::Class::PrettyDescriptor(dst_class).c_str(),
+                  mirror::Object::PrettyTypeOf(arg).c_str()).c_str());
           return false;
         }
       }
@@ -261,10 +261,10 @@
             } else { \
               ThrowIllegalArgumentException(\
                   StringPrintf("method %s argument %zd has type %s, got %s", \
-                      PrettyMethod(m, false).c_str(), \
+                      ArtMethod::PrettyMethod(m, false).c_str(), \
                       args_offset + 1, \
                       expected, \
-                      PrettyTypeOf(arg).c_str()).c_str()); \
+                      mirror::Object::PrettyTypeOf(arg).c_str()).c_str()); \
             } \
             return false; \
           } }
@@ -382,8 +382,8 @@
           (reinterpret_cast<StackReference<mirror::Object>*>(&args[i + offset]))->AsMirrorPtr();
       if (argument != nullptr && !argument->InstanceOf(param_type)) {
         LOG(ERROR) << "JNI ERROR (app bug): attempt to pass an instance of "
-                   << PrettyTypeOf(argument) << " as argument " << (i + 1)
-                   << " to " << PrettyMethod(m);
+                   << argument->PrettyTypeOf() << " as argument " << (i + 1)
+                   << " to " << m->PrettyMethod();
         ++error_count;
       }
     } else if (param_type->IsPrimitiveLong() || param_type->IsPrimitiveDouble()) {
@@ -393,25 +393,25 @@
       if (param_type->IsPrimitiveBoolean()) {
         if (arg != JNI_TRUE && arg != JNI_FALSE) {
           LOG(ERROR) << "JNI ERROR (app bug): expected jboolean (0/1) but got value of "
-              << arg << " as argument " << (i + 1) << " to " << PrettyMethod(m);
+              << arg << " as argument " << (i + 1) << " to " << m->PrettyMethod();
           ++error_count;
         }
       } else if (param_type->IsPrimitiveByte()) {
         if (arg < -128 || arg > 127) {
           LOG(ERROR) << "JNI ERROR (app bug): expected jbyte but got value of "
-              << arg << " as argument " << (i + 1) << " to " << PrettyMethod(m);
+              << arg << " as argument " << (i + 1) << " to " << m->PrettyMethod();
           ++error_count;
         }
       } else if (param_type->IsPrimitiveChar()) {
         if (args[i + offset] > 0xFFFF) {
           LOG(ERROR) << "JNI ERROR (app bug): expected jchar but got value of "
-              << arg << " as argument " << (i + 1) << " to " << PrettyMethod(m);
+              << arg << " as argument " << (i + 1) << " to " << m->PrettyMethod();
           ++error_count;
         }
       } else if (param_type->IsPrimitiveShort()) {
         if (arg < -32768 || arg > 0x7FFF) {
           LOG(ERROR) << "JNI ERROR (app bug): expected jshort but got value of "
-              << arg << " as argument " << (i + 1) << " to " << PrettyMethod(m);
+              << arg << " as argument " << (i + 1) << " to " << m->PrettyMethod();
           ++error_count;
         }
       }
@@ -421,7 +421,7 @@
     // TODO: pass the JNI function name (such as "CallVoidMethodV") through so we can call JniAbort
     // with an argument.
     vm->JniAbortF(nullptr, "bad arguments passed to %s (see above for details)",
-                  PrettyMethod(m).c_str());
+                  m->PrettyMethod().c_str());
   }
 }
 
@@ -453,7 +453,7 @@
     return JValue();
   }
 
-  ArtMethod* method = soa.DecodeMethod(mid);
+  ArtMethod* method = jni::DecodeArtMethod(mid);
   bool is_string_init = method->GetDeclaringClass()->IsStringClass() && method->IsConstructor();
   if (is_string_init) {
     // Replace calls to String.<init> with equivalent StringFactory call.
@@ -484,7 +484,7 @@
     return JValue();
   }
 
-  ArtMethod* method = soa.DecodeMethod(mid);
+  ArtMethod* method = jni::DecodeArtMethod(mid);
   bool is_string_init = method->GetDeclaringClass()->IsStringClass() && method->IsConstructor();
   if (is_string_init) {
     // Replace calls to String.<init> with equivalent StringFactory call.
@@ -516,7 +516,7 @@
   }
 
   ObjPtr<mirror::Object> receiver = soa.Decode<mirror::Object>(obj);
-  ArtMethod* method = FindVirtualMethod(receiver, soa.DecodeMethod(mid));
+  ArtMethod* method = FindVirtualMethod(receiver, jni::DecodeArtMethod(mid));
   bool is_string_init = method->GetDeclaringClass()->IsStringClass() && method->IsConstructor();
   if (is_string_init) {
     // Replace calls to String.<init> with equivalent StringFactory call.
@@ -548,7 +548,7 @@
   }
 
   ObjPtr<mirror::Object> receiver = soa.Decode<mirror::Object>(obj);
-  ArtMethod* method = FindVirtualMethod(receiver, soa.DecodeMethod(mid));
+  ArtMethod* method = FindVirtualMethod(receiver, jni::DecodeArtMethod(mid));
   bool is_string_init = method->GetDeclaringClass()->IsStringClass() && method->IsConstructor();
   if (is_string_init) {
     // Replace calls to String.<init> with equivalent StringFactory call.
@@ -634,11 +634,11 @@
                                    num_frames)) {
     ThrowIllegalAccessException(
         StringPrintf("Class %s cannot access %s method %s of class %s",
-            calling_class == nullptr ? "null" : PrettyClass(calling_class).c_str(),
+            calling_class == nullptr ? "null" : calling_class->PrettyClass().c_str(),
             PrettyJavaAccessFlags(m->GetAccessFlags()).c_str(),
-            PrettyMethod(m).c_str(),
+            m->PrettyMethod().c_str(),
             m->GetDeclaringClass() == nullptr ? "null" :
-                PrettyClass(m->GetDeclaringClass()).c_str()).c_str());
+                m->GetDeclaringClass()->PrettyClass().c_str()).c_str());
     return nullptr;
   }
 
@@ -676,8 +676,7 @@
   }
 
   // Box if necessary and return.
-  return soa.AddLocalReference<jobject>(
-      BoxPrimitive(Primitive::GetType(shorty[0]), result).Ptr());
+  return soa.AddLocalReference<jobject>(BoxPrimitive(Primitive::GetType(shorty[0]), result));
 }
 
 ObjPtr<mirror::Object> BoxPrimitive(Primitive::Type src_class, const JValue& value) {
@@ -740,15 +739,18 @@
     arg_array.Append(value.GetI());
   }
 
-  soa.DecodeMethod(m)->Invoke(soa.Self(), arg_array.GetArray(), arg_array.GetNumBytes(),
-                              &result, shorty);
+  jni::DecodeArtMethod(m)->Invoke(soa.Self(),
+                                  arg_array.GetArray(),
+                                  arg_array.GetNumBytes(),
+                                  &result,
+                                  shorty);
   return result.GetL();
 }
 
 static std::string UnboxingFailureKind(ArtField* f)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   if (f != nullptr) {
-    return "field " + PrettyField(f, false);
+    return "field " + f->PrettyField(false);
   }
   return "result";
 }
@@ -762,18 +764,20 @@
   if (!dst_class->IsPrimitive()) {
     if (UNLIKELY(o != nullptr && !o->InstanceOf(dst_class))) {
       if (!unbox_for_result) {
-        ThrowIllegalArgumentException(StringPrintf("%s has type %s, got %s",
-                                                   UnboxingFailureKind(f).c_str(),
-                                                   PrettyDescriptor(dst_class).c_str(),
-                                                   PrettyTypeOf(o).c_str()).c_str());
+        ThrowIllegalArgumentException(
+            StringPrintf("%s has type %s, got %s",
+                         UnboxingFailureKind(f).c_str(),
+                         dst_class->PrettyDescriptor().c_str(),
+                         o->PrettyTypeOf().c_str()).c_str());
       } else {
-        ThrowClassCastException(StringPrintf("Couldn't convert result of type %s to %s",
-                                             PrettyTypeOf(o).c_str(),
-                                             PrettyDescriptor(dst_class).c_str()).c_str());
+        ThrowClassCastException(
+            StringPrintf("Couldn't convert result of type %s to %s",
+                         o->PrettyTypeOf().c_str(),
+                         dst_class->PrettyDescriptor().c_str()).c_str());
       }
       return false;
     }
-    unboxed_value->SetL(o.Ptr());
+    unboxed_value->SetL(o);
     return true;
   }
   if (UNLIKELY(dst_class->GetPrimitiveType() == Primitive::kPrimVoid)) {
@@ -783,13 +787,14 @@
   }
   if (UNLIKELY(o == nullptr)) {
     if (!unbox_for_result) {
-      ThrowIllegalArgumentException(StringPrintf("%s has type %s, got null",
-                                                 UnboxingFailureKind(f).c_str(),
-                                                 PrettyDescriptor(dst_class).c_str()).c_str());
+      ThrowIllegalArgumentException(
+          StringPrintf("%s has type %s, got null",
+                       UnboxingFailureKind(f).c_str(),
+                       dst_class->PrettyDescriptor().c_str()).c_str());
     } else {
       ThrowNullPointerException(
           StringPrintf("Expected to unbox a '%s' primitive type but was returned null",
-                       PrettyDescriptor(dst_class).c_str()).c_str());
+                       dst_class->PrettyDescriptor().c_str()).c_str());
     }
     return false;
   }
@@ -827,7 +832,7 @@
     std::string temp;
     ThrowIllegalArgumentException(
         StringPrintf("%s has type %s, got %s", UnboxingFailureKind(f).c_str(),
-            PrettyDescriptor(dst_class).c_str(),
+            dst_class->PrettyDescriptor().c_str(),
             PrettyDescriptor(o->GetClass()->GetDescriptor(&temp)).c_str()).c_str());
     return false;
   }
@@ -898,8 +903,8 @@
 }
 
 void InvalidReceiverError(ObjPtr<mirror::Object> o, ObjPtr<mirror::Class> c) {
-  std::string expected_class_name(PrettyDescriptor(c));
-  std::string actual_class_name(PrettyTypeOf(o));
+  std::string expected_class_name(mirror::Class::PrettyDescriptor(c));
+  std::string actual_class_name(mirror::Object::PrettyTypeOf(o));
   ThrowIllegalArgumentException(StringPrintf("Expected receiver of type %s, but got %s",
                                              expected_class_name.c_str(),
                                              actual_class_name.c_str()).c_str());
@@ -909,16 +914,16 @@
 // Will need to be fixed if there's cases where it's not.
 void UpdateReference(Thread* self, jobject obj, ObjPtr<mirror::Object> result) {
   IndirectRef ref = reinterpret_cast<IndirectRef>(obj);
-  IndirectRefKind kind = GetIndirectRefKind(ref);
+  IndirectRefKind kind = IndirectReferenceTable::GetIndirectRefKind(ref);
   if (kind == kLocal) {
-    self->GetJniEnv()->locals.Update(obj, result.Ptr());
+    self->GetJniEnv()->locals.Update(obj, result);
   } else if (kind == kHandleScopeOrInvalid) {
     LOG(FATAL) << "Unsupported UpdateReference for kind kHandleScopeOrInvalid";
   } else if (kind == kGlobal) {
-    self->GetJniEnv()->vm->UpdateGlobal(self, ref, result.Ptr());
+    self->GetJniEnv()->vm->UpdateGlobal(self, ref, result);
   } else {
     DCHECK_EQ(kind, kWeakGlobal);
-    self->GetJniEnv()->vm->UpdateWeakGlobal(self, ref, result.Ptr());
+    self->GetJniEnv()->vm->UpdateWeakGlobal(self, ref, result);
   }
 }
 
diff --git a/runtime/reflection.h b/runtime/reflection.h
index 6e5ef71..f2652fd 100644
--- a/runtime/reflection.h
+++ b/runtime/reflection.h
@@ -47,6 +47,12 @@
                              JValue* unboxed_value)
     REQUIRES_SHARED(Locks::mutator_lock_);
 
+ALWAYS_INLINE bool ConvertPrimitiveValueNoThrow(Primitive::Type src_class,
+                                                Primitive::Type dst_class,
+                                                const JValue& src,
+                                                JValue* dst)
+    REQUIRES_SHARED(Locks::mutator_lock_);
+
 ALWAYS_INLINE bool ConvertPrimitiveValue(bool unbox_for_result,
                                          Primitive::Type src_class,
                                          Primitive::Type dst_class,
diff --git a/runtime/reflection_test.cc b/runtime/reflection_test.cc
index 189ed03..e254dfe 100644
--- a/runtime/reflection_test.cc
+++ b/runtime/reflection_test.cc
@@ -23,6 +23,7 @@
 #include "art_method-inl.h"
 #include "base/enums.h"
 #include "common_compiler_test.h"
+#include "jni_internal.h"
 #include "scoped_thread_state_change-inl.h"
 
 namespace art {
@@ -83,14 +84,15 @@
   }
 
   void ReflectionTestMakeExecutable(ArtMethod** method,
-                                    mirror::Object** receiver,
-                                    bool is_static, const char* method_name,
+                                    ObjPtr<mirror::Object>* receiver,
+                                    bool is_static,
+                                    const char* method_name,
                                     const char* method_signature)
       REQUIRES_SHARED(Locks::mutator_lock_) {
     const char* class_name = is_static ? "StaticLeafMethods" : "NonStaticLeafMethods";
     jobject jclass_loader(LoadDex(class_name));
     Thread* self = Thread::Current();
-    StackHandleScope<2> hs(self);
+    StackHandleScope<3> hs(self);
     Handle<mirror::ClassLoader> class_loader(
         hs.NewHandle(
             ScopedObjectAccessUnchecked(self).Decode<mirror::ClassLoader>(jclass_loader)));
@@ -100,8 +102,9 @@
     }
     MakeExecutable(class_loader.Get(), class_name);
 
-    mirror::Class* c = class_linker_->FindClass(self, DotToDescriptor(class_name).c_str(),
-                                                class_loader);
+    ObjPtr<mirror::Class> c = class_linker_->FindClass(self,
+                                                       DotToDescriptor(class_name).c_str(),
+                                                       class_loader);
     CHECK(c != nullptr);
 
     *method = is_static ? c->FindDirectMethod(method_name, method_signature, kRuntimePointerSize)
@@ -112,14 +115,17 @@
       *receiver = nullptr;
     } else {
       // Ensure class is initialized before allocating object
-      StackHandleScope<1> hs2(self);
-      Handle<mirror::Class> h_class(hs2.NewHandle(c));
-      bool initialized = class_linker_->EnsureInitialized(self, h_class, true, true);
-      CHECK(initialized);
+      {
+        StackHandleScope<1> hs2(self);
+        HandleWrapperObjPtr<mirror::Class> h_class(hs2.NewHandleWrapper(&c));
+        bool initialized = class_linker_->EnsureInitialized(self, h_class, true, true);
+        CHECK(initialized);
+      }
       *receiver = c->AllocObject(self);
     }
 
     // Start runtime.
+    HandleWrapperObjPtr<mirror::Object> h(hs.NewHandleWrapper(receiver));
     bool started = runtime_->Start();
     CHECK(started);
     self->TransitionFromSuspendedToRunnable();
@@ -128,121 +134,121 @@
   void InvokeNopMethod(bool is_static) {
     ScopedObjectAccess soa(env_);
     ArtMethod* method;
-    mirror::Object* receiver;
+    ObjPtr<mirror::Object> receiver;
     ReflectionTestMakeExecutable(&method, &receiver, is_static, "nop", "()V");
     ScopedLocalRef<jobject> receiver_ref(soa.Env(), soa.AddLocalReference<jobject>(receiver));
-    InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), nullptr);
+    InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), nullptr);
   }
 
   void InvokeIdentityByteMethod(bool is_static) {
     ScopedObjectAccess soa(env_);
     ArtMethod* method;
-    mirror::Object* receiver;
+    ObjPtr<mirror::Object> receiver;
     ReflectionTestMakeExecutable(&method, &receiver, is_static, "identity", "(B)B");
     ScopedLocalRef<jobject> receiver_ref(soa.Env(), soa.AddLocalReference<jobject>(receiver));
     jvalue args[1];
 
     args[0].b = 0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(0, result.GetB());
 
     args[0].b = -1;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(-1, result.GetB());
 
     args[0].b = SCHAR_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(SCHAR_MAX, result.GetB());
 
     static_assert(SCHAR_MIN == -128, "SCHAR_MIN unexpected");
     args[0].b = SCHAR_MIN;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(SCHAR_MIN, result.GetB());
   }
 
   void InvokeIdentityIntMethod(bool is_static) {
     ScopedObjectAccess soa(env_);
     ArtMethod* method;
-    mirror::Object* receiver;
+    ObjPtr<mirror::Object> receiver;
     ReflectionTestMakeExecutable(&method, &receiver, is_static, "identity", "(I)I");
     ScopedLocalRef<jobject> receiver_ref(soa.Env(), soa.AddLocalReference<jobject>(receiver));
     jvalue args[1];
 
     args[0].i = 0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(0, result.GetI());
 
     args[0].i = -1;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(-1, result.GetI());
 
     args[0].i = INT_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(INT_MAX, result.GetI());
 
     args[0].i = INT_MIN;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(INT_MIN, result.GetI());
   }
 
   void InvokeIdentityDoubleMethod(bool is_static) {
     ScopedObjectAccess soa(env_);
     ArtMethod* method;
-    mirror::Object* receiver;
+    ObjPtr<mirror::Object> receiver;
     ReflectionTestMakeExecutable(&method, &receiver, is_static, "identity", "(D)D");
     ScopedLocalRef<jobject> receiver_ref(soa.Env(), soa.AddLocalReference<jobject>(receiver));
     jvalue args[1];
 
     args[0].d = 0.0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(0.0, result.GetD());
 
     args[0].d = -1.0;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(-1.0, result.GetD());
 
     args[0].d = DBL_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(DBL_MAX, result.GetD());
 
     args[0].d = DBL_MIN;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(DBL_MIN, result.GetD());
   }
 
   void InvokeSumIntIntMethod(bool is_static) {
     ScopedObjectAccess soa(env_);
     ArtMethod* method;
-    mirror::Object* receiver;
+    ObjPtr<mirror::Object> receiver;
     ReflectionTestMakeExecutable(&method, &receiver, is_static, "sum", "(II)I");
     ScopedLocalRef<jobject> receiver_ref(soa.Env(), soa.AddLocalReference<jobject>(receiver));
     jvalue args[2];
 
     args[0].i = 1;
     args[1].i = 2;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(3, result.GetI());
 
     args[0].i = -2;
     args[1].i = 5;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(3, result.GetI());
 
     args[0].i = INT_MAX;
     args[1].i = INT_MIN;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(-1, result.GetI());
 
     args[0].i = INT_MAX;
     args[1].i = INT_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(-2, result.GetI());
   }
 
   void InvokeSumIntIntIntMethod(bool is_static) {
     ScopedObjectAccess soa(env_);
     ArtMethod* method;
-    mirror::Object* receiver;
+    ObjPtr<mirror::Object> receiver;
     ReflectionTestMakeExecutable(&method, &receiver, is_static, "sum", "(III)I");
     ScopedLocalRef<jobject> receiver_ref(soa.Env(), soa.AddLocalReference<jobject>(receiver));
     jvalue args[3];
@@ -250,38 +256,38 @@
     args[0].i = 0;
     args[1].i = 0;
     args[2].i = 0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(0, result.GetI());
 
     args[0].i = 1;
     args[1].i = 2;
     args[2].i = 3;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(6, result.GetI());
 
     args[0].i = -1;
     args[1].i = 2;
     args[2].i = -3;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(-2, result.GetI());
 
     args[0].i = INT_MAX;
     args[1].i = INT_MIN;
     args[2].i = INT_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(2147483646, result.GetI());
 
     args[0].i = INT_MAX;
     args[1].i = INT_MAX;
     args[2].i = INT_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(2147483645, result.GetI());
   }
 
   void InvokeSumIntIntIntIntMethod(bool is_static) {
     ScopedObjectAccess soa(env_);
     ArtMethod* method;
-    mirror::Object* receiver;
+    ObjPtr<mirror::Object> receiver;
     ReflectionTestMakeExecutable(&method, &receiver, is_static, "sum", "(IIII)I");
     ScopedLocalRef<jobject> receiver_ref(soa.Env(), soa.AddLocalReference<jobject>(receiver));
     jvalue args[4];
@@ -290,42 +296,42 @@
     args[1].i = 0;
     args[2].i = 0;
     args[3].i = 0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(0, result.GetI());
 
     args[0].i = 1;
     args[1].i = 2;
     args[2].i = 3;
     args[3].i = 4;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(10, result.GetI());
 
     args[0].i = -1;
     args[1].i = 2;
     args[2].i = -3;
     args[3].i = 4;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(2, result.GetI());
 
     args[0].i = INT_MAX;
     args[1].i = INT_MIN;
     args[2].i = INT_MAX;
     args[3].i = INT_MIN;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(-2, result.GetI());
 
     args[0].i = INT_MAX;
     args[1].i = INT_MAX;
     args[2].i = INT_MAX;
     args[3].i = INT_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(-4, result.GetI());
   }
 
   void InvokeSumIntIntIntIntIntMethod(bool is_static) {
     ScopedObjectAccess soa(env_);
     ArtMethod* method;
-    mirror::Object* receiver;
+    ObjPtr<mirror::Object> receiver;
     ReflectionTestMakeExecutable(&method, &receiver, is_static, "sum", "(IIIII)I");
     ScopedLocalRef<jobject> receiver_ref(soa.Env(), soa.AddLocalReference<jobject>(receiver));
     jvalue args[5];
@@ -335,7 +341,7 @@
     args[2].i = 0;
     args[3].i = 0;
     args[4].i = 0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(0, result.GetI());
 
     args[0].i = 1;
@@ -343,7 +349,7 @@
     args[2].i = 3;
     args[3].i = 4;
     args[4].i = 5;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(15, result.GetI());
 
     args[0].i = -1;
@@ -351,7 +357,7 @@
     args[2].i = -3;
     args[3].i = 4;
     args[4].i = -5;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(-3, result.GetI());
 
     args[0].i = INT_MAX;
@@ -359,7 +365,7 @@
     args[2].i = INT_MAX;
     args[3].i = INT_MIN;
     args[4].i = INT_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(2147483645, result.GetI());
 
     args[0].i = INT_MAX;
@@ -367,48 +373,48 @@
     args[2].i = INT_MAX;
     args[3].i = INT_MAX;
     args[4].i = INT_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_EQ(2147483643, result.GetI());
   }
 
   void InvokeSumDoubleDoubleMethod(bool is_static) {
     ScopedObjectAccess soa(env_);
     ArtMethod* method;
-    mirror::Object* receiver;
+    ObjPtr<mirror::Object> receiver;
     ReflectionTestMakeExecutable(&method, &receiver, is_static, "sum", "(DD)D");
     ScopedLocalRef<jobject> receiver_ref(soa.Env(), soa.AddLocalReference<jobject>(receiver));
     jvalue args[2];
 
     args[0].d = 0.0;
     args[1].d = 0.0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(0.0, result.GetD());
 
     args[0].d = 1.0;
     args[1].d = 2.0;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(3.0, result.GetD());
 
     args[0].d = 1.0;
     args[1].d = -2.0;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(-1.0, result.GetD());
 
     args[0].d = DBL_MAX;
     args[1].d = DBL_MIN;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(1.7976931348623157e308, result.GetD());
 
     args[0].d = DBL_MAX;
     args[1].d = DBL_MAX;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(INFINITY, result.GetD());
   }
 
   void InvokeSumDoubleDoubleDoubleMethod(bool is_static) {
     ScopedObjectAccess soa(env_);
     ArtMethod* method;
-    mirror::Object* receiver;
+    ObjPtr<mirror::Object> receiver;
     ReflectionTestMakeExecutable(&method, &receiver, is_static, "sum", "(DDD)D");
     ScopedLocalRef<jobject> receiver_ref(soa.Env(), soa.AddLocalReference<jobject>(receiver));
     jvalue args[3];
@@ -416,26 +422,26 @@
     args[0].d = 0.0;
     args[1].d = 0.0;
     args[2].d = 0.0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(0.0, result.GetD());
 
     args[0].d = 1.0;
     args[1].d = 2.0;
     args[2].d = 3.0;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(6.0, result.GetD());
 
     args[0].d = 1.0;
     args[1].d = -2.0;
     args[2].d = 3.0;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(2.0, result.GetD());
   }
 
   void InvokeSumDoubleDoubleDoubleDoubleMethod(bool is_static) {
     ScopedObjectAccess soa(env_);
     ArtMethod* method;
-    mirror::Object* receiver;
+    ObjPtr<mirror::Object> receiver;
     ReflectionTestMakeExecutable(&method, &receiver, is_static, "sum", "(DDDD)D");
     ScopedLocalRef<jobject> receiver_ref(soa.Env(), soa.AddLocalReference<jobject>(receiver));
     jvalue args[4];
@@ -444,28 +450,28 @@
     args[1].d = 0.0;
     args[2].d = 0.0;
     args[3].d = 0.0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(0.0, result.GetD());
 
     args[0].d = 1.0;
     args[1].d = 2.0;
     args[2].d = 3.0;
     args[3].d = 4.0;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(10.0, result.GetD());
 
     args[0].d = 1.0;
     args[1].d = -2.0;
     args[2].d = 3.0;
     args[3].d = -4.0;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(-2.0, result.GetD());
   }
 
   void InvokeSumDoubleDoubleDoubleDoubleDoubleMethod(bool is_static) {
     ScopedObjectAccess soa(env_);
     ArtMethod* method;
-    mirror::Object* receiver;
+    ObjPtr<mirror::Object> receiver;
     ReflectionTestMakeExecutable(&method, &receiver, is_static, "sum", "(DDDDD)D");
     ScopedLocalRef<jobject> receiver_ref(soa.Env(), soa.AddLocalReference<jobject>(receiver));
     jvalue args[5];
@@ -475,7 +481,7 @@
     args[2].d = 0.0;
     args[3].d = 0.0;
     args[4].d = 0.0;
-    JValue result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    JValue result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(0.0, result.GetD());
 
     args[0].d = 1.0;
@@ -483,7 +489,7 @@
     args[2].d = 3.0;
     args[3].d = 4.0;
     args[4].d = 5.0;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(15.0, result.GetD());
 
     args[0].d = 1.0;
@@ -491,7 +497,7 @@
     args[2].d = 3.0;
     args[3].d = -4.0;
     args[4].d = 5.0;
-    result = InvokeWithJValues(soa, receiver_ref.get(), soa.EncodeMethod(method), args);
+    result = InvokeWithJValues(soa, receiver_ref.get(), jni::EncodeArtMethod(method), args);
     EXPECT_DOUBLE_EQ(3.0, result.GetD());
   }
 
@@ -526,7 +532,7 @@
 
   jvalue args[1];
   args[0].l = nullptr;
-  InvokeWithJValues(soa, nullptr, soa.EncodeMethod(method), args);
+  InvokeWithJValues(soa, nullptr, jni::EncodeArtMethod(method), args);
 }
 
 TEST_F(ReflectionTest, StaticNopMethod) {
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index 7d9d506..09a0462 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -85,7 +85,9 @@
 #include "linear_alloc.h"
 #include "mirror/array.h"
 #include "mirror/class-inl.h"
+#include "mirror/class_ext.h"
 #include "mirror/class_loader.h"
+#include "mirror/emulated_stack_frame.h"
 #include "mirror/field.h"
 #include "mirror/method.h"
 #include "mirror/method_handle_impl.h"
@@ -147,6 +149,10 @@
 #include "verifier/method_verifier.h"
 #include "well_known_classes.h"
 
+#ifdef ART_TARGET_ANDROID
+#include <android/set_abort_message.h>
+#endif
+
 namespace art {
 
 // If a signal isn't handled properly, enable a handler that attempts to dump the Java stack.
@@ -233,6 +239,7 @@
       force_native_bridge_(false),
       is_native_bridge_loaded_(false),
       is_native_debuggable_(false),
+      is_fully_deoptable_(false),
       zygote_max_failed_boots_(0),
       experimental_flags_(ExperimentalFlags::kNone),
       oat_file_manager_(nullptr),
@@ -343,13 +350,13 @@
   delete class_linker_;
   delete heap_;
   delete intern_table_;
-  delete java_vm_;
   delete oat_file_manager_;
   Thread::Shutdown();
   QuasiAtomic::Shutdown();
   verifier::MethodVerifier::Shutdown();
 
   // Destroy allocators before shutting down the MemMap because they may use it.
+  java_vm_.reset();
   linear_alloc_.reset();
   low_4gb_arena_pool_.reset();
   arena_pool_.reset();
@@ -495,7 +502,7 @@
 bool Runtime::ParseOptions(const RuntimeOptions& raw_options,
                            bool ignore_unrecognized,
                            RuntimeArgumentMap* runtime_options) {
-  InitLogging(/* argv */ nullptr);  // Calls Locks::Init() as a side effect.
+  InitLogging(/* argv */ nullptr, Aborter);  // Calls Locks::Init() as a side effect.
   bool parsed = ParsedOptions::Parse(raw_options, ignore_unrecognized, runtime_options);
   if (!parsed) {
     LOG(ERROR) << "Failed to parse options";
@@ -504,12 +511,21 @@
   return true;
 }
 
+// Callback to check whether it is safe to call Abort (e.g., to use a call to
+// LOG(FATAL)).  It is only safe to call Abort if the runtime has been created,
+// properly initialized, and has not shut down.
+static bool IsSafeToCallAbort() NO_THREAD_SAFETY_ANALYSIS {
+  Runtime* runtime = Runtime::Current();
+  return runtime != nullptr && runtime->IsStarted() && !runtime->IsShuttingDownLocked();
+}
+
 bool Runtime::Create(RuntimeArgumentMap&& runtime_options) {
   // TODO: acquire a static mutex on Runtime to avoid racing.
   if (Runtime::instance_ != nullptr) {
     return false;
   }
   instance_ = new Runtime;
+  Locks::SetClientCallback(IsSafeToCallAbort);
   if (!instance_->Init(std::move(runtime_options))) {
     // TODO: Currently deleting the instance will abort the runtime on destruction. Now This will
     // leak memory, instead. Fix the destructor. b/19100793.
@@ -544,7 +560,10 @@
       "getSystemClassLoader", "()Ljava/lang/ClassLoader;", pointer_size);
   CHECK(getSystemClassLoader != nullptr);
 
-  JValue result = InvokeWithJValues(soa, nullptr, soa.EncodeMethod(getSystemClassLoader), nullptr);
+  JValue result = InvokeWithJValues(soa,
+                                    nullptr,
+                                    jni::EncodeArtMethod(getSystemClassLoader),
+                                    nullptr);
   JNIEnv* env = soa.Self()->GetJniEnv();
   ScopedLocalRef<jobject> system_class_loader(env, soa.AddLocalReference<jobject>(result.GetL()));
   CHECK(system_class_loader.get() != nullptr);
@@ -746,6 +765,9 @@
 }
 
 bool Runtime::IsDebuggable() const {
+  if (IsFullyDeoptable()) {
+    return true;
+  }
   const OatFile* oat_file = GetOatFileManager().GetPrimaryOatFile();
   return oat_file != nullptr && oat_file->IsDebuggable();
 }
@@ -829,7 +851,7 @@
     if (file.get() == nullptr) {
       return false;
     }
-    std::unique_ptr<ElfFile> elf_file(ElfFile::Open(file.release(),
+    std::unique_ptr<ElfFile> elf_file(ElfFile::Open(file.get(),
                                                     false /* writable */,
                                                     false /* program_header_only */,
                                                     false /* low_4gb */,
@@ -969,6 +991,8 @@
   verify_ = runtime_options.GetOrDefault(Opt::Verify);
   allow_dex_file_fallback_ = !runtime_options.Exists(Opt::NoDexFileFallback);
 
+  is_fully_deoptable_ = runtime_options.Exists(Opt::FullyDeoptable);
+
   no_sig_chain_ = runtime_options.Exists(Opt::NoSigChain);
   force_native_bridge_ = runtime_options.Exists(Opt::ForceNativeBridge);
 
@@ -1005,8 +1029,10 @@
                        runtime_options.GetOrDefault(Opt::NonMovingSpaceCapacity),
                        runtime_options.GetOrDefault(Opt::Image),
                        runtime_options.GetOrDefault(Opt::ImageInstructionSet),
-                       xgc_option.collector_type_,
-                       runtime_options.GetOrDefault(Opt::BackgroundGc),
+                       // Override the collector type to CC if the read barrier config.
+                       kUseReadBarrier ? gc::kCollectorTypeCC : xgc_option.collector_type_,
+                       kUseReadBarrier ? BackgroundGcOption(gc::kCollectorTypeCCBackground)
+                                       : runtime_options.GetOrDefault(Opt::BackgroundGc),
                        runtime_options.GetOrDefault(Opt::LargeObjectSpace),
                        runtime_options.GetOrDefault(Opt::LargeObjectThreshold),
                        runtime_options.GetOrDefault(Opt::ParallelGCThreads),
@@ -1116,7 +1142,12 @@
     }
   }
 
-  java_vm_ = new JavaVMExt(this, runtime_options);
+  std::string error_msg;
+  java_vm_ = JavaVMExt::Create(this, runtime_options, &error_msg);
+  if (java_vm_.get() == nullptr) {
+    LOG(ERROR) << "Could not initialize JavaVMExt: " << error_msg;
+    return false;
+  }
 
   // Add the JniEnv handler.
   // TODO Refactor this stuff.
@@ -1140,7 +1171,6 @@
   CHECK_GE(GetHeap()->GetContinuousSpaces().size(), 1U);
   class_linker_ = new ClassLinker(intern_table_);
   if (GetHeap()->HasBootImageSpace()) {
-    std::string error_msg;
     bool result = class_linker_->InitFromBootImage(&error_msg);
     if (!result) {
       LOG(ERROR) << "Could not initialize from image: " << error_msg;
@@ -1165,10 +1195,6 @@
       ScopedTrace trace2("AddImageStringsToTable");
       GetInternTable()->AddImagesStringsToTable(heap_->GetBootImageSpaces());
     }
-    {
-      ScopedTrace trace2("MoveImageClassesToClassTable");
-      GetClassLinker()->AddBootImageClassesToClassTable();
-    }
   } else {
     std::vector<std::string> dex_filenames;
     Split(boot_class_path_string_, ':', &dex_filenames);
@@ -1191,7 +1217,6 @@
                    &boot_class_path);
     }
     instruction_set_ = runtime_options.GetOrDefault(Opt::ImageInstructionSet);
-    std::string error_msg;
     if (!class_linker_->InitWithoutImage(std::move(boot_class_path), &error_msg)) {
       LOG(ERROR) << "Could not initialize without image: " << error_msg;
       return false;
@@ -1299,6 +1324,28 @@
   return true;
 }
 
+// Attach a new agent and add it to the list of runtime agents
+//
+// TODO: once we decide on the threading model for agents,
+//   revisit this and make sure we're doing this on the right thread
+//   (and we synchronize access to any shared data structures like "agents_")
+//
+void Runtime::AttachAgent(const std::string& agent_arg) {
+  ti::Agent agent(agent_arg);
+
+  int res = 0;
+  std::string err;
+  ti::Agent::LoadError result = agent.Attach(&res, &err);
+
+  if (result == ti::Agent::kNoError) {
+    agents_.push_back(std::move(agent));
+  } else {
+    LOG(ERROR) << "Agent attach failed (result=" << result << ") : " << err;
+    ScopedObjectAccess soa(Thread::Current());
+    ThrowWrappedIOException("%s", err.c_str());
+  }
+}
+
 void Runtime::InitNativeMethods() {
   VLOG(startup) << "Runtime::InitNativeMethods entering";
   Thread* self = Thread::Current();
@@ -1559,6 +1606,8 @@
   mirror::Field::VisitRoots(visitor);
   mirror::MethodType::VisitRoots(visitor);
   mirror::MethodHandleImpl::VisitRoots(visitor);
+  mirror::EmulatedStackFrame::VisitRoots(visitor);
+  mirror::ClassExt::VisitRoots(visitor);
   // Visit all the primitive array types classes.
   mirror::PrimitiveArray<uint8_t>::VisitRoots(visitor);   // BooleanArray
   mirror::PrimitiveArray<int8_t>::VisitRoots(visitor);    // ByteArray
@@ -1720,10 +1769,10 @@
   }
 }
 
-void Runtime::BroadcastForNewSystemWeaks() {
+void Runtime::BroadcastForNewSystemWeaks(bool broadcast_for_checkpoint) {
   // This is used for the read barrier case that uses the thread-local
-  // Thread::GetWeakRefAccessEnabled() flag.
-  CHECK(kUseReadBarrier);
+  // Thread::GetWeakRefAccessEnabled() flag and the checkpoint while weak ref access is disabled
+  // (see ThreadList::RunCheckpoint).
   monitor_list_->BroadcastForNewMonitors();
   intern_table_->BroadcastForNewInterns();
   java_vm_->BroadcastForNewWeakGlobals();
@@ -1731,7 +1780,7 @@
 
   // All other generic system-weak holders.
   for (gc::AbstractSystemWeakHolder* holder : system_weak_holders_) {
-    holder->Broadcast();
+    holder->Broadcast(broadcast_for_checkpoint);
   }
 }
 
@@ -1899,11 +1948,16 @@
   preinitialization_transaction_->RecordWriteField64(obj, field_offset, value, is_volatile);
 }
 
-void Runtime::RecordWriteFieldReference(mirror::Object* obj, MemberOffset field_offset,
-                                        mirror::Object* value, bool is_volatile) const {
+void Runtime::RecordWriteFieldReference(mirror::Object* obj,
+                                        MemberOffset field_offset,
+                                        ObjPtr<mirror::Object> value,
+                                        bool is_volatile) const {
   DCHECK(IsAotCompiler());
   DCHECK(IsActiveTransaction());
-  preinitialization_transaction_->RecordWriteFieldReference(obj, field_offset, value, is_volatile);
+  preinitialization_transaction_->RecordWriteFieldReference(obj,
+                                                            field_offset,
+                                                            value.Ptr(),
+                                                            is_volatile);
 }
 
 void Runtime::RecordWriteArray(mirror::Array* array, size_t index, uint64_t value) const {
@@ -1912,31 +1966,31 @@
   preinitialization_transaction_->RecordWriteArray(array, index, value);
 }
 
-void Runtime::RecordStrongStringInsertion(mirror::String* s) const {
+void Runtime::RecordStrongStringInsertion(ObjPtr<mirror::String> s) const {
   DCHECK(IsAotCompiler());
   DCHECK(IsActiveTransaction());
   preinitialization_transaction_->RecordStrongStringInsertion(s);
 }
 
-void Runtime::RecordWeakStringInsertion(mirror::String* s) const {
+void Runtime::RecordWeakStringInsertion(ObjPtr<mirror::String> s) const {
   DCHECK(IsAotCompiler());
   DCHECK(IsActiveTransaction());
   preinitialization_transaction_->RecordWeakStringInsertion(s);
 }
 
-void Runtime::RecordStrongStringRemoval(mirror::String* s) const {
+void Runtime::RecordStrongStringRemoval(ObjPtr<mirror::String> s) const {
   DCHECK(IsAotCompiler());
   DCHECK(IsActiveTransaction());
   preinitialization_transaction_->RecordStrongStringRemoval(s);
 }
 
-void Runtime::RecordWeakStringRemoval(mirror::String* s) const {
+void Runtime::RecordWeakStringRemoval(ObjPtr<mirror::String> s) const {
   DCHECK(IsAotCompiler());
   DCHECK(IsActiveTransaction());
   preinitialization_transaction_->RecordWeakStringRemoval(s);
 }
 
-void Runtime::RecordResolveString(mirror::DexCache* dex_cache, uint32_t string_idx) const {
+void Runtime::RecordResolveString(ObjPtr<mirror::DexCache> dex_cache, uint32_t string_idx) const {
   DCHECK(IsAotCompiler());
   DCHECK(IsActiveTransaction());
   preinitialization_transaction_->RecordResolveString(dex_cache, string_idx);
@@ -2094,4 +2148,12 @@
   }
 }
 
+NO_RETURN
+void Runtime::Aborter(const char* abort_message) {
+#ifdef __ANDROID__
+  android_set_abort_message(abort_message);
+#endif
+  Runtime::Abort(abort_message);
+}
+
 }  // namespace art
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 5a95f78..de5a356 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -107,9 +107,7 @@
   kVisitRootFlagStartLoggingNewRoots = 0x4,
   kVisitRootFlagStopLoggingNewRoots = 0x8,
   kVisitRootFlagClearRootLog = 0x10,
-  // Non moving means we can have optimizations where we don't visit some roots if they are
-  // definitely reachable from another location. E.g. ArtMethod and ArtField roots.
-  kVisitRootFlagNonMoving = 0x20,
+  kVisitRootFlagClassLoader = 0x20,
 };
 
 class Runtime {
@@ -182,7 +180,7 @@
     return compiler_options_;
   }
 
-  void AddCompilerOption(std::string option) {
+  void AddCompilerOption(const std::string& option) {
     compiler_options_.push_back(option);
   }
 
@@ -277,7 +275,7 @@
   }
 
   JavaVMExt* GetJavaVM() const {
-    return java_vm_;
+    return java_vm_.get();
   }
 
   size_t GetMaxSpinsBeforeThinkLockInflation() const {
@@ -321,11 +319,15 @@
 
   void DisallowNewSystemWeaks() REQUIRES_SHARED(Locks::mutator_lock_);
   void AllowNewSystemWeaks() REQUIRES_SHARED(Locks::mutator_lock_);
-  void BroadcastForNewSystemWeaks() REQUIRES_SHARED(Locks::mutator_lock_);
+  // broadcast_for_checkpoint is true when we broadcast for making blocking threads to respond to
+  // checkpoint requests. It's false when we broadcast to unblock blocking threads after system weak
+  // access is reenabled.
+  void BroadcastForNewSystemWeaks(bool broadcast_for_checkpoint = false);
 
   // Visit all the roots. If only_dirty is true then non-dirty roots won't be visited. If
   // clean_dirty is true then dirty roots will be marked as non-dirty after visiting.
   void VisitRoots(RootVisitor* visitor, VisitRootFlags flags = kVisitRootFlagAllRoots)
+      REQUIRES(!Locks::classlinker_classes_lock_, !Locks::trace_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Visit image roots, only used for hprof since the GC uses the image space mod union table
@@ -335,6 +337,7 @@
   // Visit all of the roots we can do safely do concurrently.
   void VisitConcurrentRoots(RootVisitor* visitor,
                             VisitRootFlags flags = kVisitRootFlagAllRoots)
+      REQUIRES(!Locks::classlinker_classes_lock_, !Locks::trace_lock_)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Visit all of the non thread roots, we can do this with mutators unpaused.
@@ -502,19 +505,22 @@
                           bool is_volatile) const;
   void RecordWriteField64(mirror::Object* obj, MemberOffset field_offset, uint64_t value,
                           bool is_volatile) const;
-  void RecordWriteFieldReference(mirror::Object* obj, MemberOffset field_offset,
-                                 mirror::Object* value, bool is_volatile) const;
+  void RecordWriteFieldReference(mirror::Object* obj,
+                                 MemberOffset field_offset,
+                                 ObjPtr<mirror::Object> value,
+                                 bool is_volatile) const
+      REQUIRES_SHARED(Locks::mutator_lock_);
   void RecordWriteArray(mirror::Array* array, size_t index, uint64_t value) const
       REQUIRES_SHARED(Locks::mutator_lock_);
-  void RecordStrongStringInsertion(mirror::String* s) const
+  void RecordStrongStringInsertion(ObjPtr<mirror::String> s) const
       REQUIRES(Locks::intern_table_lock_);
-  void RecordWeakStringInsertion(mirror::String* s) const
+  void RecordWeakStringInsertion(ObjPtr<mirror::String> s) const
       REQUIRES(Locks::intern_table_lock_);
-  void RecordStrongStringRemoval(mirror::String* s) const
+  void RecordStrongStringRemoval(ObjPtr<mirror::String> s) const
       REQUIRES(Locks::intern_table_lock_);
-  void RecordWeakStringRemoval(mirror::String* s) const
+  void RecordWeakStringRemoval(ObjPtr<mirror::String> s) const
       REQUIRES(Locks::intern_table_lock_);
-  void RecordResolveString(mirror::DexCache* dex_cache, uint32_t string_idx) const
+  void RecordResolveString(ObjPtr<mirror::DexCache> dex_cache, uint32_t string_idx) const
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   void SetFaultMessage(const std::string& message) REQUIRES(!fault_message_lock_);
@@ -586,6 +592,14 @@
 
   bool IsDebuggable() const;
 
+  bool IsFullyDeoptable() const {
+    return is_fully_deoptable_;
+  }
+
+  void SetFullyDeoptable(bool value) {
+    is_fully_deoptable_ = value;
+  }
+
   bool IsNativeDebuggable() const {
     return is_native_debuggable_;
   }
@@ -659,6 +673,11 @@
   void AddSystemWeakHolder(gc::AbstractSystemWeakHolder* holder);
   void RemoveSystemWeakHolder(gc::AbstractSystemWeakHolder* holder);
 
+  NO_RETURN
+  static void Aborter(const char* abort_message);
+
+  void AttachAgent(const std::string& agent_arg);
+
  private:
   static void InitPlatformSignalHandlers();
 
@@ -751,7 +770,7 @@
   SignalCatcher* signal_catcher_;
   std::string stack_trace_file_;
 
-  JavaVMExt* java_vm_;
+  std::unique_ptr<JavaVMExt> java_vm_;
 
   std::unique_ptr<jit::Jit> jit_;
   std::unique_ptr<jit::JitOptions> jit_options_;
@@ -846,6 +865,9 @@
   // Whether we are running under native debugger.
   bool is_native_debuggable_;
 
+  // Whether we are expected to be deoptable at all points.
+  bool is_fully_deoptable_;
+
   // The maximum number of failed boots we allow before pruning the dalvik cache
   // and trying again. This option is only inspected when we're running as a
   // zygote.
diff --git a/runtime/runtime_android.cc b/runtime/runtime_android.cc
index aed6a2b..be97860 100644
--- a/runtime/runtime_android.cc
+++ b/runtime/runtime_android.cc
@@ -27,7 +27,6 @@
 
 namespace art {
 
-static constexpr bool kDumpHeapObjectOnSigsevg = false;
 static constexpr bool kUseSignalHandler = false;
 
 struct sigaction old_action;
@@ -48,11 +47,6 @@
   if (runtime != nullptr) {
     // Print this out first in case DumpObject faults.
     LOG(FATAL_WITHOUT_ABORT) << "Fault message: " << runtime->GetFaultMessage();
-    gc::Heap* heap = runtime->GetHeap();
-    if (kDumpHeapObjectOnSigsevg && heap != nullptr && info != nullptr) {
-      LOG(FATAL_WITHOUT_ABORT) << "Dump heap object at fault address: ";
-      heap->DumpObject(LOG_STREAM(FATAL_WITHOUT_ABORT), reinterpret_cast<mirror::Object*>(info->si_addr));
-    }
   }
   // Run the old signal handler.
   old_action.sa_sigaction(signal_number, info, raw_context);
diff --git a/runtime/runtime_linux.cc b/runtime/runtime_linux.cc
index cee73e1..93704a9 100644
--- a/runtime/runtime_linux.cc
+++ b/runtime/runtime_linux.cc
@@ -21,6 +21,7 @@
 #include <sys/utsname.h>
 #include <inttypes.h>
 
+#include <iostream>
 #include <sstream>
 
 #include "base/dumpable.h"
@@ -35,7 +36,6 @@
 
 namespace art {
 
-static constexpr bool kDumpHeapObjectOnSigsevg = false;
 static constexpr bool kUseSigRTTimeout = true;
 static constexpr bool kDumpNativeStackOnTimeout = true;
 
@@ -337,17 +337,21 @@
   UContext thread_context(raw_context);
   Backtrace thread_backtrace(raw_context);
 
-  LOG(FATAL_WITHOUT_ABORT) << "*** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***\n"
-                      << StringPrintf("Fatal signal %d (%s), code %d (%s)",
-                                      signal_number, GetSignalName(signal_number),
-                                      info->si_code,
-                                      GetSignalCodeName(signal_number, info->si_code))
-                      << (has_address ? StringPrintf(" fault addr %p", info->si_addr) : "") << "\n"
-                      << "OS: " << Dumpable<OsInfo>(os_info) << "\n"
-                      << "Cmdline: " << cmd_line << "\n"
-                      << "Thread: " << tid << " \"" << thread_name << "\"\n"
-                      << "Registers:\n" << Dumpable<UContext>(thread_context) << "\n"
-                      << "Backtrace:\n" << Dumpable<Backtrace>(thread_backtrace);
+  // Note: We are using cerr directly instead of LOG macros to ensure even just partial output
+  //       makes it out. That means we lose the "dalvikvm..." prefix, but that is acceptable
+  //       considering this is an abort situation.
+
+  std::cerr << "*** *** *** *** *** *** *** *** *** *** *** *** *** *** *** ***\n"
+            << StringPrintf("Fatal signal %d (%s), code %d (%s)",
+                            signal_number, GetSignalName(signal_number),
+                            info->si_code,
+                            GetSignalCodeName(signal_number, info->si_code))
+            << (has_address ? StringPrintf(" fault addr %p", info->si_addr) : "") << std::endl
+            << "OS: " << Dumpable<OsInfo>(os_info) << std::endl
+            << "Cmdline: " << cmd_line << std::endl
+            << "Thread: " << tid << " \"" << thread_name << "\"" << std::endl
+            << "Registers:\n" << Dumpable<UContext>(thread_context) << std::endl
+            << "Backtrace:\n" << Dumpable<Backtrace>(thread_backtrace) << std::endl;
   if (kIsDebugBuild && signal_number == SIGSEGV) {
     PrintFileToLog("/proc/self/maps", LogSeverity::FATAL_WITHOUT_ABORT);
   }
@@ -357,23 +361,20 @@
       // Special timeout signal. Try to dump all threads.
       // Note: Do not use DumpForSigQuit, as that might disable native unwind, but the native parts
       //       are of value here.
-      runtime->GetThreadList()->Dump(LOG_STREAM(FATAL_WITHOUT_ABORT), kDumpNativeStackOnTimeout);
+      runtime->GetThreadList()->Dump(std::cerr, kDumpNativeStackOnTimeout);
+      std::cerr << std::endl;
     }
-    gc::Heap* heap = runtime->GetHeap();
-    LOG(FATAL_WITHOUT_ABORT) << "Fault message: " << runtime->GetFaultMessage();
-    if (kDumpHeapObjectOnSigsevg && heap != nullptr && info != nullptr) {
-      LOG(FATAL_WITHOUT_ABORT) << "Dump heap object at fault address: ";
-      heap->DumpObject(LOG_STREAM(FATAL_WITHOUT_ABORT), reinterpret_cast<mirror::Object*>(info->si_addr));
-    }
+    std::cerr << "Fault message: " << runtime->GetFaultMessage() << std::endl;
   }
   if (getenv("debug_db_uid") != nullptr || getenv("art_wait_for_gdb_on_crash") != nullptr) {
-    LOG(FATAL_WITHOUT_ABORT) << "********************************************************\n"
-                        << "* Process " << getpid() << " thread " << tid << " \"" << thread_name
-                        << "\""
-                        << " has been suspended while crashing.\n"
-                        << "* Attach gdb:\n"
-                        << "*     gdb -p " << tid << "\n"
-                        << "********************************************************\n";
+    std::cerr << "********************************************************\n"
+              << "* Process " << getpid() << " thread " << tid << " \"" << thread_name
+              << "\""
+              << " has been suspended while crashing.\n"
+              << "* Attach gdb:\n"
+              << "*     gdb -p " << tid << "\n"
+              << "********************************************************"
+              << std::endl;
     // Wait for debugger to attach.
     while (true) {
     }
diff --git a/runtime/runtime_options.def b/runtime/runtime_options.def
index b01a570..d1970fe 100644
--- a/runtime/runtime_options.def
+++ b/runtime/runtime_options.def
@@ -121,6 +121,7 @@
 RUNTIME_OPTIONS_KEY (std::vector<ti::Agent>,         AgentLib)  // -agentlib:<libname>=<options>, Requires -Xexperimental:agents
 RUNTIME_OPTIONS_KEY (std::vector<ti::Agent>,         AgentPath)  // -agentpath:<libname>=<options>, Requires -Xexperimental:agents
 RUNTIME_OPTIONS_KEY (std::vector<Plugin>,            Plugins)  // -Xplugin:<library> Requires -Xexperimental:runtime-plugins
+RUNTIME_OPTIONS_KEY (Unit,                           FullyDeoptable)  // -Xfully-deoptable
 
 // Not parse-able from command line, but can be provided explicitly.
 // (Do not add anything here that is defined in ParsedOptions::MakeParser)
diff --git a/runtime/scoped_thread_state_change-inl.h b/runtime/scoped_thread_state_change-inl.h
index ac25757..d4469f4 100644
--- a/runtime/scoped_thread_state_change-inl.h
+++ b/runtime/scoped_thread_state_change-inl.h
@@ -83,31 +83,7 @@
 inline ObjPtr<T, kPoison> ScopedObjectAccessAlreadyRunnable::Decode(jobject obj) const {
   Locks::mutator_lock_->AssertSharedHeld(Self());
   DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
-  return down_cast<T*>(Self()->DecodeJObject(obj));
-}
-
-inline ArtField* ScopedObjectAccessAlreadyRunnable::DecodeField(jfieldID fid) const {
-  Locks::mutator_lock_->AssertSharedHeld(Self());
-  DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
-  return reinterpret_cast<ArtField*>(fid);
-}
-
-inline jfieldID ScopedObjectAccessAlreadyRunnable::EncodeField(ArtField* field) const {
-  Locks::mutator_lock_->AssertSharedHeld(Self());
-  DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
-  return reinterpret_cast<jfieldID>(field);
-}
-
-inline ArtMethod* ScopedObjectAccessAlreadyRunnable::DecodeMethod(jmethodID mid) const {
-  Locks::mutator_lock_->AssertSharedHeld(Self());
-  DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
-  return reinterpret_cast<ArtMethod*>(mid);
-}
-
-inline jmethodID ScopedObjectAccessAlreadyRunnable::EncodeMethod(ArtMethod* method) const {
-  Locks::mutator_lock_->AssertSharedHeld(Self());
-  DCHECK(IsRunnable());  // Don't work with raw objects in non-runnable states.
-  return reinterpret_cast<jmethodID>(method);
+  return ObjPtr<T, kPoison>::DownCast(Self()->DecodeJObject(obj));
 }
 
 inline bool ScopedObjectAccessAlreadyRunnable::IsRunnable() const {
diff --git a/runtime/scoped_thread_state_change.h b/runtime/scoped_thread_state_change.h
index 04fd914..b499258 100644
--- a/runtime/scoped_thread_state_change.h
+++ b/runtime/scoped_thread_state_change.h
@@ -94,14 +94,6 @@
   template<typename T, bool kPoison = kIsDebugBuild>
   ObjPtr<T, kPoison> Decode(jobject obj) const REQUIRES_SHARED(Locks::mutator_lock_);
 
-  ArtField* DecodeField(jfieldID fid) const REQUIRES_SHARED(Locks::mutator_lock_);
-
-  jfieldID EncodeField(ArtField* field) const REQUIRES_SHARED(Locks::mutator_lock_);
-
-  ArtMethod* DecodeMethod(jmethodID mid) const REQUIRES_SHARED(Locks::mutator_lock_);
-
-  jmethodID EncodeMethod(ArtMethod* method) const REQUIRES_SHARED(Locks::mutator_lock_);
-
   ALWAYS_INLINE bool IsRunnable() const;
 
  protected:
diff --git a/runtime/stack.cc b/runtime/stack.cc
index 3b5360c..167a30b 100644
--- a/runtime/stack.cc
+++ b/runtime/stack.cc
@@ -49,7 +49,7 @@
     return GetVRegReference(0);
   } else {
     const DexFile::CodeItem* code_item = m->GetCodeItem();
-    CHECK(code_item != nullptr) << PrettyMethod(m);
+    CHECK(code_item != nullptr) << ArtMethod::PrettyMethod(m);
     uint16_t reg = code_item->registers_size_ - code_item->ins_size_;
     return GetVRegReference(reg);
   }
@@ -190,14 +190,14 @@
     const DexFile::CodeItem* code_item = m->GetCodeItem();
     if (code_item == nullptr) {
       UNIMPLEMENTED(ERROR) << "Failed to determine this object of abstract or proxy method: "
-          << PrettyMethod(m);
+          << ArtMethod::PrettyMethod(m);
       return nullptr;
     } else {
       uint16_t reg = code_item->registers_size_ - code_item->ins_size_;
       uint32_t value = 0;
       bool success = GetVReg(m, reg, kReferenceVReg, &value);
       // We currently always guarantee the `this` object is live throughout the method.
-      CHECK(success) << "Failed to read the this object in " << PrettyMethod(m);
+      CHECK(success) << "Failed to read the this object in " << ArtMethod::PrettyMethod(m);
       return reinterpret_cast<mirror::Object*>(value);
     }
   }
@@ -257,8 +257,8 @@
                                             uint32_t* val) const {
   DCHECK_EQ(m, GetMethod());
   const DexFile::CodeItem* code_item = m->GetCodeItem();
-  DCHECK(code_item != nullptr) << PrettyMethod(m);  // Can't be null or how would we compile
-                                                    // its instructions?
+  DCHECK(code_item != nullptr) << m->PrettyMethod();  // Can't be null or how would we compile
+                                                      // its instructions?
   uint16_t number_of_dex_registers = code_item->registers_size_;
   DCHECK_LT(vreg, code_item->registers_size_);
   const OatQuickMethodHeader* method_header = GetCurrentOatQuickMethodHeader();
@@ -606,7 +606,7 @@
   if (m == nullptr) {
     return "upcall";
   }
-  result += PrettyMethod(m);
+  result += m->PrettyMethod();
   result += StringPrintf("' at dex PC 0x%04x", GetDexPc());
   if (!IsShadowFrame()) {
     result += StringPrintf(" (native PC %p)", reinterpret_cast<void*>(GetCurrentQuickFramePc()));
@@ -651,7 +651,7 @@
   uint32_t code_size = OatQuickMethodHeader::FromEntryPoint(code)->code_size_;
   uintptr_t code_start = reinterpret_cast<uintptr_t>(code);
   CHECK(code_start <= pc && pc <= (code_start + code_size))
-      << PrettyMethod(method)
+      << method->PrettyMethod()
       << " pc=" << std::hex << pc
       << " code_start=" << code_start
       << " code_size=" << code_size;
@@ -693,7 +693,7 @@
             }
           }
         }
-        CHECK(in_image) << PrettyMethod(method) << " not in linear alloc or image";
+        CHECK(in_image) << method->PrettyMethod() << " not in linear alloc or image";
       }
     }
     if (cur_quick_frame_ != nullptr) {
@@ -709,7 +709,7 @@
       // TODO: 083-compiler-regressions ManyFloatArgs shows this estimate is wrong.
       // const size_t kMaxExpectedFrameSize = (256 + 2 + 3 + 3) * sizeof(word);
       const size_t kMaxExpectedFrameSize = 2 * KB;
-      CHECK_LE(frame_size, kMaxExpectedFrameSize) << PrettyMethod(method);
+      CHECK_LE(frame_size, kMaxExpectedFrameSize) << method->PrettyMethod();
       size_t return_pc_offset = GetCurrentQuickFrameInfo().GetReturnPcOffset();
       CHECK_LT(return_pc_offset, frame_size);
     }
@@ -762,7 +762,7 @@
   ClassLinker* class_linker = runtime->GetClassLinker();
   const void* entry_point = runtime->GetInstrumentation()->GetQuickCodeFor(method,
                                                                            kRuntimePointerSize);
-  DCHECK(class_linker->IsQuickGenericJniStub(entry_point)) << PrettyMethod(method);
+  DCHECK(class_linker->IsQuickGenericJniStub(entry_point)) << method->PrettyMethod();
   // Generic JNI frame.
   uint32_t handle_refs = GetNumberOfReferenceArgsWithoutReceiver(method) + 1;
   size_t scope_size = HandleScope::SizeOf(handle_refs);
@@ -851,12 +851,12 @@
             } else if (instrumentation_frame.interpreter_entry_) {
               ArtMethod* callee =
                   Runtime::Current()->GetCalleeSaveMethod(Runtime::kSaveRefsAndArgs);
-              CHECK_EQ(GetMethod(), callee) << "Expected: " << PrettyMethod(callee) << " Found: "
-                                            << PrettyMethod(GetMethod());
+              CHECK_EQ(GetMethod(), callee) << "Expected: " << ArtMethod::PrettyMethod(callee)
+                                            << " Found: " << ArtMethod::PrettyMethod(GetMethod());
             } else {
               CHECK_EQ(instrumentation_frame.method_, GetMethod())
-                  << "Expected: " << PrettyMethod(instrumentation_frame.method_)
-                  << " Found: " << PrettyMethod(GetMethod());
+                  << "Expected: " << ArtMethod::PrettyMethod(instrumentation_frame.method_)
+                  << " Found: " << ArtMethod::PrettyMethod(GetMethod());
             }
             if (num_frames_ != 0) {
               // Check agreement of frame Ids only if num_frames_ is computed to avoid infinite
@@ -876,7 +876,7 @@
         cur_quick_frame_ = reinterpret_cast<ArtMethod**>(next_frame);
 
         if (kDebugStackWalk) {
-          LOG(INFO) << PrettyMethod(method) << "@" << method << " size=" << frame_size
+          LOG(INFO) << ArtMethod::PrettyMethod(method) << "@" << method << " size=" << frame_size
               << std::boolalpha
               << " optimized=" << (cur_oat_quick_method_header_ != nullptr &&
                                    cur_oat_quick_method_header_->IsOptimized())
@@ -999,7 +999,7 @@
     self->ClearException();
     self->ThrowNewExceptionF("Ljava/lang/IllegalMonitorStateException;",
                              "did not lock monitor on object of type '%s' before unlocking",
-                             PrettyTypeOf(const_cast<mirror::Object*>(obj)).c_str());
+                             const_cast<mirror::Object*>(obj)->PrettyTypeOf().c_str());
   }
 }
 
@@ -1033,7 +1033,7 @@
       mirror::Object* first = (*monitors_)[0];
       self->ThrowNewExceptionF("Ljava/lang/IllegalMonitorStateException;",
                                "did not unlock monitor on object of type '%s'",
-                               PrettyTypeOf(first).c_str());
+                               mirror::Object::PrettyTypeOf(first).c_str());
 
       // To make sure this path is not triggered again, clean out the monitors.
       monitors_->clear();
diff --git a/runtime/stack.h b/runtime/stack.h
index e9ed497..992bda5 100644
--- a/runtime/stack.h
+++ b/runtime/stack.h
@@ -469,14 +469,21 @@
   }
 };
 
-class JavaFrameRootInfo : public RootInfo {
+class JavaFrameRootInfo FINAL : public RootInfo {
  public:
   JavaFrameRootInfo(uint32_t thread_id, const StackVisitor* stack_visitor, size_t vreg)
      : RootInfo(kRootJavaFrame, thread_id), stack_visitor_(stack_visitor), vreg_(vreg) {
   }
-  virtual void Describe(std::ostream& os) const OVERRIDE
+  void Describe(std::ostream& os) const OVERRIDE
       REQUIRES_SHARED(Locks::mutator_lock_);
 
+  size_t GetVReg() const {
+    return vreg_;
+  }
+  const StackVisitor* GetVisitor() const {
+    return stack_visitor_;
+  }
+
  private:
   const StackVisitor* const stack_visitor_;
   const size_t vreg_;
@@ -572,8 +579,7 @@
   };
 
  protected:
-  StackVisitor(Thread* thread, Context* context, StackWalkKind walk_kind)
-      REQUIRES_SHARED(Locks::mutator_lock_);
+  StackVisitor(Thread* thread, Context* context, StackWalkKind walk_kind);
 
   bool GetRegisterIfAccessible(uint32_t reg, VRegKind kind, uint32_t* val) const
       REQUIRES_SHARED(Locks::mutator_lock_);
@@ -624,7 +630,7 @@
     return num_frames_;
   }
 
-  size_t GetFrameDepth() REQUIRES_SHARED(Locks::mutator_lock_) {
+  size_t GetFrameDepth() const REQUIRES_SHARED(Locks::mutator_lock_) {
     return cur_depth_;
   }
 
diff --git a/runtime/thread-inl.h b/runtime/thread-inl.h
index 6c3811b..c92305f 100644
--- a/runtime/thread-inl.h
+++ b/runtime/thread-inl.h
@@ -29,6 +29,7 @@
 #include "base/mutex-inl.h"
 #include "gc/heap.h"
 #include "jni_env_ext.h"
+#include "runtime.h"
 #include "thread_pool.h"
 
 namespace art {
@@ -71,6 +72,19 @@
       RunCheckpointFunction();
     } else if (ReadFlag(kSuspendRequest)) {
       FullSuspendCheck();
+    } else if (ReadFlag(kEmptyCheckpointRequest)) {
+      RunEmptyCheckpoint();
+    } else {
+      break;
+    }
+  }
+}
+
+inline void Thread::CheckEmptyCheckpoint() {
+  DCHECK_EQ(Thread::Current(), this);
+  for (;;) {
+    if (ReadFlag(kEmptyCheckpointRequest)) {
+      RunEmptyCheckpoint();
     } else {
       break;
     }
@@ -144,8 +158,13 @@
       RunCheckpointFunction();
       continue;
     }
+    if (UNLIKELY((old_state_and_flags.as_struct.flags & kEmptyCheckpointRequest) != 0)) {
+      RunEmptyCheckpoint();
+      continue;
+    }
     // Change the state but keep the current flags (kCheckpointRequest is clear).
     DCHECK_EQ((old_state_and_flags.as_struct.flags & kCheckpointRequest), 0);
+    DCHECK_EQ((old_state_and_flags.as_struct.flags & kEmptyCheckpointRequest), 0);
     new_state_and_flags.as_struct.flags = old_state_and_flags.as_struct.flags;
     new_state_and_flags.as_struct.state = new_state;
 
@@ -162,7 +181,8 @@
 inline void Thread::PassActiveSuspendBarriers() {
   while (true) {
     uint16_t current_flags = tls32_.state_and_flags.as_struct.flags;
-    if (LIKELY((current_flags & (kCheckpointRequest | kActiveSuspendBarrier)) == 0)) {
+    if (LIKELY((current_flags &
+                (kCheckpointRequest | kEmptyCheckpointRequest | kActiveSuspendBarrier)) == 0)) {
       break;
     } else if ((current_flags & kActiveSuspendBarrier) != 0) {
       PassActiveSuspendBarriers(this);
@@ -210,7 +230,8 @@
       }
     } else if ((old_state_and_flags.as_struct.flags & kActiveSuspendBarrier) != 0) {
       PassActiveSuspendBarriers(this);
-    } else if ((old_state_and_flags.as_struct.flags & kCheckpointRequest) != 0) {
+    } else if ((old_state_and_flags.as_struct.flags &
+                (kCheckpointRequest | kEmptyCheckpointRequest)) != 0) {
       // Impossible
       LOG(FATAL) << "Transitioning to runnable with checkpoint flag, "
                  << " flags=" << old_state_and_flags.as_struct.flags
diff --git a/runtime/thread.cc b/runtime/thread.cc
index 7335e40..b99df26 100644
--- a/runtime/thread.cc
+++ b/runtime/thread.cc
@@ -116,6 +116,13 @@
 }
 
 void InitEntryPoints(JniEntryPoints* jpoints, QuickEntryPoints* qpoints);
+void UpdateReadBarrierEntrypoints(QuickEntryPoints* qpoints, bool is_marking);
+
+void Thread::SetIsGcMarkingAndUpdateEntrypoints(bool is_marking) {
+  CHECK(kUseReadBarrier);
+  tls32_.is_gc_marking = is_marking;
+  UpdateReadBarrierEntrypoints(&tlsPtr_.quick_entrypoints, is_marking);
+}
 
 void Thread::InitTlsEntryPoints() {
   // Insert a placeholder so we can easily tell if we call an unimplemented entry point.
@@ -332,7 +339,7 @@
   if (shadow_frame != nullptr) {
     return shadow_frame;
   }
-  VLOG(deopt) << "Create pre-deopted ShadowFrame for " << PrettyMethod(method);
+  VLOG(deopt) << "Create pre-deopted ShadowFrame for " << ArtMethod::PrettyMethod(method);
   shadow_frame = ShadowFrame::CreateDeoptimizedFrame(num_vregs, nullptr, method, dex_pc);
   FrameIdToShadowFrame* record = FrameIdToShadowFrame::Create(frame_id,
                                                               shadow_frame,
@@ -410,9 +417,9 @@
     self->tlsPtr_.opeer = soa.Decode<mirror::Object>(self->tlsPtr_.jpeer).Ptr();
     self->GetJniEnv()->DeleteGlobalRef(self->tlsPtr_.jpeer);
     self->tlsPtr_.jpeer = nullptr;
-    self->SetThreadName(self->GetThreadName(soa)->ToModifiedUtf8().c_str());
+    self->SetThreadName(self->GetThreadName()->ToModifiedUtf8().c_str());
 
-    ArtField* priorityField = soa.DecodeField(WellKnownClasses::java_lang_Thread_priority);
+    ArtField* priorityField = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_priority);
     self->SetNativePriority(priorityField->GetInt(self->tlsPtr_.opeer));
     Dbg::PostThreadStart(self);
 
@@ -430,7 +437,7 @@
 
 Thread* Thread::FromManagedThread(const ScopedObjectAccessAlreadyRunnable& soa,
                                   mirror::Object* thread_peer) {
-  ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_nativePeer);
+  ArtField* f = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_nativePeer);
   Thread* result = reinterpret_cast<Thread*>(static_cast<uintptr_t>(f->GetLong(thread_peer)));
   // Sanity check that if we have a result it is either suspended or we hold the thread_list_lock_
   // to stop it from going away.
@@ -562,7 +569,7 @@
   if (VLOG_IS_ON(threads)) {
     ScopedObjectAccess soa(env);
 
-    ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_name);
+    ArtField* f = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_name);
     ObjPtr<mirror::String> java_name =
         f->GetObject(soa.Decode<mirror::Object>(java_peer))->AsString();
     std::string thread_name;
@@ -606,8 +613,9 @@
 
   // Try to allocate a JNIEnvExt for the thread. We do this here as we might be out of memory and
   // do not have a good way to report this on the child's side.
+  std::string error_msg;
   std::unique_ptr<JNIEnvExt> child_jni_env_ext(
-      JNIEnvExt::Create(child_thread, Runtime::Current()->GetJavaVM()));
+      JNIEnvExt::Create(child_thread, Runtime::Current()->GetJavaVM(), &error_msg));
 
   int pthread_create_result = 0;
   if (child_jni_env_ext.get() != nullptr) {
@@ -648,7 +656,7 @@
   env->SetLongField(java_peer, WellKnownClasses::java_lang_Thread_nativePeer, 0);
   {
     std::string msg(child_jni_env_ext.get() == nullptr ?
-        "Could not allocate JNI Env" :
+        StringPrintf("Could not allocate JNI Env: %s", error_msg.c_str()) :
         StringPrintf("pthread_create (%s stack) failed: %s",
                                  PrettySize(stack_size).c_str(), strerror(pthread_create_result)));
     ScopedObjectAccess soa(env);
@@ -693,8 +701,10 @@
     DCHECK_EQ(jni_env_ext->self, this);
     tlsPtr_.jni_env = jni_env_ext;
   } else {
-    tlsPtr_.jni_env = JNIEnvExt::Create(this, java_vm);
+    std::string error_msg;
+    tlsPtr_.jni_env = JNIEnvExt::Create(this, java_vm, &error_msg);
     if (tlsPtr_.jni_env == nullptr) {
+      LOG(ERROR) << "Failed to create JNIEnvExt: " << error_msg;
       return false;
     }
   }
@@ -820,7 +830,7 @@
 
   ScopedObjectAccess soa(self);
   StackHandleScope<1> hs(self);
-  MutableHandle<mirror::String> peer_thread_name(hs.NewHandle(GetThreadName(soa)));
+  MutableHandle<mirror::String> peer_thread_name(hs.NewHandle(GetThreadName()));
   if (peer_thread_name.Get() == nullptr) {
     // The Thread constructor should have set the Thread.name to a
     // non-null value. However, because we can run without code
@@ -831,7 +841,7 @@
     } else {
       InitPeer<false>(soa, thread_is_daemon, thread_group, thread_name.get(), thread_priority);
     }
-    peer_thread_name.Assign(GetThreadName(soa));
+    peer_thread_name.Assign(GetThreadName());
   }
   // 'thread_name' may have been null, so don't trust 'peer_thread_name' to be non-null.
   if (peer_thread_name.Get() != nullptr) {
@@ -842,13 +852,13 @@
 template<bool kTransactionActive>
 void Thread::InitPeer(ScopedObjectAccess& soa, jboolean thread_is_daemon, jobject thread_group,
                       jobject thread_name, jint thread_priority) {
-  soa.DecodeField(WellKnownClasses::java_lang_Thread_daemon)->
+  jni::DecodeArtField(WellKnownClasses::java_lang_Thread_daemon)->
       SetBoolean<kTransactionActive>(tlsPtr_.opeer, thread_is_daemon);
-  soa.DecodeField(WellKnownClasses::java_lang_Thread_group)->
+  jni::DecodeArtField(WellKnownClasses::java_lang_Thread_group)->
       SetObject<kTransactionActive>(tlsPtr_.opeer, soa.Decode<mirror::Object>(thread_group));
-  soa.DecodeField(WellKnownClasses::java_lang_Thread_name)->
+  jni::DecodeArtField(WellKnownClasses::java_lang_Thread_name)->
       SetObject<kTransactionActive>(tlsPtr_.opeer, soa.Decode<mirror::Object>(thread_name));
-  soa.DecodeField(WellKnownClasses::java_lang_Thread_priority)->
+  jni::DecodeArtField(WellKnownClasses::java_lang_Thread_priority)->
       SetInt<kTransactionActive>(tlsPtr_.opeer, thread_priority);
 }
 
@@ -944,8 +954,8 @@
   DumpStack(os, dump_native_stack, backtrace_map);
 }
 
-mirror::String* Thread::GetThreadName(const ScopedObjectAccessAlreadyRunnable& soa) const {
-  ArtField* f = soa.DecodeField(WellKnownClasses::java_lang_Thread_name);
+mirror::String* Thread::GetThreadName() const {
+  ArtField* f = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_name);
   if (tlsPtr_.opeer == nullptr) {
     return nullptr;
   }
@@ -1145,6 +1155,12 @@
   } while (!done);
 }
 
+void Thread::RunEmptyCheckpoint() {
+  DCHECK_EQ(Thread::Current(), this);
+  AtomicClearFlag(kEmptyCheckpointRequest);
+  Runtime::Current()->GetThreadList()->EmptyCheckpointBarrier()->Pass(this);
+}
+
 bool Thread::RequestCheckpoint(Closure* function) {
   union StateAndFlags old_state_and_flags;
   old_state_and_flags.as_int = tls32_.state_and_flags.as_int;
@@ -1172,6 +1188,107 @@
   return success;
 }
 
+bool Thread::RequestEmptyCheckpoint() {
+  union StateAndFlags old_state_and_flags;
+  old_state_and_flags.as_int = tls32_.state_and_flags.as_int;
+  if (old_state_and_flags.as_struct.state != kRunnable) {
+    // If it's not runnable, we don't need to do anything because it won't be in the middle of a
+    // heap access (eg. the read barrier).
+    return false;
+  }
+
+  // We must be runnable to request a checkpoint.
+  DCHECK_EQ(old_state_and_flags.as_struct.state, kRunnable);
+  union StateAndFlags new_state_and_flags;
+  new_state_and_flags.as_int = old_state_and_flags.as_int;
+  new_state_and_flags.as_struct.flags |= kEmptyCheckpointRequest;
+  bool success = tls32_.state_and_flags.as_atomic_int.CompareExchangeStrongSequentiallyConsistent(
+      old_state_and_flags.as_int, new_state_and_flags.as_int);
+  if (success) {
+    TriggerSuspend();
+  }
+  return success;
+}
+
+class BarrierClosure : public Closure {
+ public:
+  explicit BarrierClosure(Closure* wrapped) : wrapped_(wrapped), barrier_(0) {}
+
+  void Run(Thread* self) OVERRIDE {
+    wrapped_->Run(self);
+    barrier_.Pass(self);
+  }
+
+  void Wait(Thread* self) {
+    barrier_.Increment(self, 1);
+  }
+
+ private:
+  Closure* wrapped_;
+  Barrier barrier_;
+};
+
+void Thread::RequestSynchronousCheckpoint(Closure* function) {
+  if (this == Thread::Current()) {
+    // Asked to run on this thread. Just run.
+    function->Run(this);
+    return;
+  }
+  Thread* self = Thread::Current();
+
+  // The current thread is not this thread.
+
+  for (;;) {
+    // If this thread is runnable, try to schedule a checkpoint. Do some gymnastics to not hold the
+    // suspend-count lock for too long.
+    if (GetState() == ThreadState::kRunnable) {
+      BarrierClosure barrier_closure(function);
+      bool installed = false;
+      {
+        MutexLock mu(self, *Locks::thread_suspend_count_lock_);
+        installed = RequestCheckpoint(&barrier_closure);
+      }
+      if (installed) {
+        barrier_closure.Wait(self);
+        return;
+      }
+      // Fall-through.
+    }
+
+    // This thread is not runnable, make sure we stay suspended, then run the checkpoint.
+    // Note: ModifySuspendCountInternal also expects the thread_list_lock to be held in
+    //       certain situations.
+    {
+      MutexLock mu(self, *Locks::thread_list_lock_);
+      MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+
+      if (!ModifySuspendCount(self, +1, nullptr, false)) {
+        // Just retry the loop.
+        sched_yield();
+        continue;
+      }
+    }
+
+    while (GetState() == ThreadState::kRunnable) {
+      // We became runnable again. Wait till the suspend triggered in ModifySuspendCount
+      // moves us to suspended.
+      sched_yield();
+    }
+
+    function->Run(this);
+
+    {
+      MutexLock mu(self, *Locks::thread_list_lock_);
+      MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+
+      DCHECK_NE(GetState(), ThreadState::kRunnable);
+      CHECK(ModifySuspendCount(self, -1, nullptr, false));
+    }
+
+    return;  // We're done, break out of the loop.
+  }
+}
+
 Closure* Thread::GetFlipFunction() {
   Atomic<Closure*>* atomic_func = reinterpret_cast<Atomic<Closure*>*>(&tlsPtr_.flip_function);
   Closure* func;
@@ -1224,17 +1341,18 @@
   // cause ScopedObjectAccessUnchecked to deadlock.
   if (gAborting == 0 && self != nullptr && thread != nullptr && thread->tlsPtr_.opeer != nullptr) {
     ScopedObjectAccessUnchecked soa(self);
-    priority = soa.DecodeField(WellKnownClasses::java_lang_Thread_priority)
+    priority = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_priority)
         ->GetInt(thread->tlsPtr_.opeer);
-    is_daemon = soa.DecodeField(WellKnownClasses::java_lang_Thread_daemon)
+    is_daemon = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_daemon)
         ->GetBoolean(thread->tlsPtr_.opeer);
 
     ObjPtr<mirror::Object> thread_group =
-        soa.DecodeField(WellKnownClasses::java_lang_Thread_group)->GetObject(thread->tlsPtr_.opeer);
+        jni::DecodeArtField(WellKnownClasses::java_lang_Thread_group)
+            ->GetObject(thread->tlsPtr_.opeer);
 
     if (thread_group != nullptr) {
       ArtField* group_name_field =
-          soa.DecodeField(WellKnownClasses::java_lang_ThreadGroup_name);
+          jni::DecodeArtField(WellKnownClasses::java_lang_ThreadGroup_name);
       ObjPtr<mirror::String> group_name_string =
           group_name_field->GetObject(thread_group)->AsString();
       group_name = (group_name_string != nullptr) ? group_name_string->ToModifiedUtf8() : "<null>";
@@ -1380,7 +1498,7 @@
       last_method = m;
     }
     if (repetition_count < kMaxRepetition) {
-      os << "  at " << PrettyMethod(m, false);
+      os << "  at " << m->PrettyMethod(false);
       if (m->IsNative()) {
         os << "(Native method)";
       } else {
@@ -1420,11 +1538,11 @@
         // Getting the identity hashcode here would result in lock inflation and suspension of the
         // current thread, which isn't safe if this is the only runnable thread.
         os << StringPrintf("<@addr=0x%" PRIxPTR "> (a %s)", reinterpret_cast<intptr_t>(o),
-                           PrettyTypeOf(o).c_str());
+                           o->PrettyTypeOf().c_str());
       } else {
         // IdentityHashCode can cause thread suspension, which would invalidate o if it moved. So
         // we get the pretty type beofre we call IdentityHashCode.
-        const std::string pretty_type(PrettyTypeOf(o));
+        const std::string pretty_type(o->PrettyTypeOf());
         os << StringPrintf("<0x%08x> (a %s)", o->IdentityHashCode(), pretty_type.c_str());
       }
     }
@@ -1668,7 +1786,7 @@
       OVERRIDE NO_THREAD_SAFETY_ANALYSIS {
     if (self_->HoldsLock(entered_monitor)) {
       LOG(WARNING) << "Calling MonitorExit on object "
-                   << entered_monitor << " (" << PrettyTypeOf(entered_monitor) << ")"
+                   << entered_monitor << " (" << entered_monitor->PrettyTypeOf() << ")"
                    << " left locked by native thread "
                    << *Thread::Current() << " which is detaching";
       entered_monitor->MonitorExit(self_);
@@ -1710,10 +1828,10 @@
 
     // this.nativePeer = 0;
     if (Runtime::Current()->IsActiveTransaction()) {
-      soa.DecodeField(WellKnownClasses::java_lang_Thread_nativePeer)
+      jni::DecodeArtField(WellKnownClasses::java_lang_Thread_nativePeer)
           ->SetLong<true>(tlsPtr_.opeer, 0);
     } else {
-      soa.DecodeField(WellKnownClasses::java_lang_Thread_nativePeer)
+      jni::DecodeArtField(WellKnownClasses::java_lang_Thread_nativePeer)
           ->SetLong<false>(tlsPtr_.opeer, 0);
     }
     Dbg::PostThreadDeath(self);
@@ -1721,7 +1839,7 @@
     // Thread.join() is implemented as an Object.wait() on the Thread.lock object. Signal anyone
     // who is waiting.
     ObjPtr<mirror::Object> lock =
-        soa.DecodeField(WellKnownClasses::java_lang_Thread_lock)->GetObject(tlsPtr_.opeer);
+        jni::DecodeArtField(WellKnownClasses::java_lang_Thread_lock)->GetObject(tlsPtr_.opeer);
     // (This conditional is only needed for tests, where Thread.lock won't have been set.)
     if (lock != nullptr) {
       StackHandleScope<1> hs(self);
@@ -1751,7 +1869,8 @@
     tlsPtr_.jni_env = nullptr;
   }
   CHECK_NE(GetState(), kRunnable);
-  CHECK_NE(ReadFlag(kCheckpointRequest), true);
+  CHECK(!ReadFlag(kCheckpointRequest));
+  CHECK(!ReadFlag(kEmptyCheckpointRequest));
   CHECK(tlsPtr_.checkpoint_function == nullptr);
   CHECK_EQ(checkpoint_overflow_.size(), 0u);
   CHECK(tlsPtr_.flip_function == nullptr);
@@ -1812,7 +1931,7 @@
 void Thread::RemoveFromThreadGroup(ScopedObjectAccess& soa) {
   // this.group.removeThread(this);
   // group can be null if we're in the compiler or a test.
-  ObjPtr<mirror::Object> ogroup = soa.DecodeField(WellKnownClasses::java_lang_Thread_group)
+  ObjPtr<mirror::Object> ogroup = jni::DecodeArtField(WellKnownClasses::java_lang_Thread_group)
       ->GetObject(tlsPtr_.opeer);
   if (ogroup != nullptr) {
     ScopedLocalRef<jobject> group(soa.Env(), soa.AddLocalReference<jobject>(ogroup));
@@ -1824,18 +1943,10 @@
   }
 }
 
-size_t Thread::NumHandleReferences() {
-  size_t count = 0;
-  for (HandleScope* cur = tlsPtr_.top_handle_scope; cur != nullptr; cur = cur->GetLink()) {
-    count += cur->NumberOfReferences();
-  }
-  return count;
-}
-
 bool Thread::HandleScopeContains(jobject obj) const {
   StackReference<mirror::Object>* hs_entry =
       reinterpret_cast<StackReference<mirror::Object>*>(obj);
-  for (HandleScope* cur = tlsPtr_.top_handle_scope; cur!= nullptr; cur = cur->GetLink()) {
+  for (BaseHandleScope* cur = tlsPtr_.top_handle_scope; cur!= nullptr; cur = cur->GetLink()) {
     if (cur->Contains(hs_entry)) {
       return true;
     }
@@ -1847,21 +1958,17 @@
 void Thread::HandleScopeVisitRoots(RootVisitor* visitor, uint32_t thread_id) {
   BufferedRootVisitor<kDefaultBufferedRootCount> buffered_visitor(
       visitor, RootInfo(kRootNativeStack, thread_id));
-  for (HandleScope* cur = tlsPtr_.top_handle_scope; cur; cur = cur->GetLink()) {
-    for (size_t j = 0, count = cur->NumberOfReferences(); j < count; ++j) {
-      // GetReference returns a pointer to the stack reference within the handle scope. If this
-      // needs to be updated, it will be done by the root visitor.
-      buffered_visitor.VisitRootIfNonNull(cur->GetHandle(j).GetReference());
-    }
+  for (BaseHandleScope* cur = tlsPtr_.top_handle_scope; cur; cur = cur->GetLink()) {
+    cur->VisitRoots(buffered_visitor);
   }
 }
 
-mirror::Object* Thread::DecodeJObject(jobject obj) const {
+ObjPtr<mirror::Object> Thread::DecodeJObject(jobject obj) const {
   if (obj == nullptr) {
     return nullptr;
   }
   IndirectRef ref = reinterpret_cast<IndirectRef>(obj);
-  IndirectRefKind kind = GetIndirectRefKind(ref);
+  IndirectRefKind kind = IndirectReferenceTable::GetIndirectRefKind(ref);
   ObjPtr<mirror::Object> result;
   bool expect_null = false;
   // The "kinds" below are sorted by the frequency we expect to encounter them.
@@ -1875,7 +1982,7 @@
     if (LIKELY(HandleScopeContains(obj))) {
       // Read from handle scope.
       result = reinterpret_cast<StackReference<mirror::Object>*>(obj)->AsMirrorPtr();
-      VerifyObject(result.Ptr());
+      VerifyObject(result);
     } else {
       tlsPtr_.jni_env->vm->JniAbortF(nullptr, "use of invalid jobject %p", obj);
       expect_null = true;
@@ -1897,13 +2004,13 @@
     tlsPtr_.jni_env->vm->JniAbortF(nullptr, "use of deleted %s %p",
                                    ToStr<IndirectRefKind>(kind).c_str(), obj);
   }
-  return result.Ptr();
+  return result;
 }
 
 bool Thread::IsJWeakCleared(jweak obj) const {
   CHECK(obj != nullptr);
   IndirectRef ref = reinterpret_cast<IndirectRef>(obj);
-  IndirectRefKind kind = GetIndirectRefKind(ref);
+  IndirectRefKind kind = IndirectReferenceTable::GetIndirectRefKind(ref);
   CHECK_EQ(kind, kWeakGlobal);
   return tlsPtr_.jni_env->vm->IsWeakGlobalCleared(const_cast<Thread*>(this), ref);
 }
@@ -2276,7 +2383,7 @@
   }
   DCHECK(!runtime->IsStarted() || exception_class->IsThrowableClass());
   Handle<mirror::Throwable> exception(
-      hs.NewHandle(down_cast<mirror::Throwable*>(exception_class->AllocObject(this))));
+      hs.NewHandle(ObjPtr<mirror::Throwable>::DownCast(exception_class->AllocObject(this))));
 
   // If we couldn't allocate the exception, throw the pre-allocated out of memory exception.
   if (exception.Get() == nullptr) {
@@ -2318,17 +2425,17 @@
     // case in the compiler. We won't be able to invoke the constructor of the exception, so set
     // the exception fields directly.
     if (msg != nullptr) {
-      exception->SetDetailMessage(down_cast<mirror::String*>(DecodeJObject(msg_string.get())));
+      exception->SetDetailMessage(DecodeJObject(msg_string.get())->AsString());
     }
     if (cause.get() != nullptr) {
-      exception->SetCause(down_cast<mirror::Throwable*>(DecodeJObject(cause.get())));
+      exception->SetCause(DecodeJObject(cause.get())->AsThrowable());
     }
     ScopedLocalRef<jobject> trace(GetJniEnv(),
                                   Runtime::Current()->IsActiveTransaction()
                                       ? CreateInternalStackTrace<true>(soa)
                                       : CreateInternalStackTrace<false>(soa));
     if (trace.get() != nullptr) {
-      exception->SetStackState(down_cast<mirror::Throwable*>(DecodeJObject(trace.get())));
+      exception->SetStackState(DecodeJObject(trace.get()).Ptr());
     }
     SetException(exception.Get());
   } else {
@@ -2344,7 +2451,7 @@
       ++i;
     }
     ScopedLocalRef<jobject> ref(soa.Env(), soa.AddLocalReference<jobject>(exception.Get()));
-    InvokeWithJValues(soa, ref.get(), soa.EncodeMethod(exception_init_method), jv_args);
+    InvokeWithJValues(soa, ref.get(), jni::EncodeArtMethod(exception_init_method), jv_args);
     if (LIKELY(!IsExceptionPending())) {
       SetException(exception.Get());
     }
@@ -2433,7 +2540,7 @@
   QUICK_ENTRY_POINT_INFO(pAllocStringFromChars)
   QUICK_ENTRY_POINT_INFO(pAllocStringFromString)
   QUICK_ENTRY_POINT_INFO(pInstanceofNonTrivial)
-  QUICK_ENTRY_POINT_INFO(pCheckCast)
+  QUICK_ENTRY_POINT_INFO(pCheckInstanceOf)
   QUICK_ENTRY_POINT_INFO(pInitializeStaticStorage)
   QUICK_ENTRY_POINT_INFO(pInitializeTypeAndVerifyAccess)
   QUICK_ENTRY_POINT_INFO(pInitializeType)
@@ -2701,7 +2808,7 @@
 
   bool VisitFrame() REQUIRES_SHARED(Locks::mutator_lock_) {
     if (false) {
-      LOG(INFO) << "Visiting stack roots in " << PrettyMethod(GetMethod())
+      LOG(INFO) << "Visiting stack roots in " << ArtMethod::PrettyMethod(GetMethod())
                 << StringPrintf("@ PC:%04x", GetDexPc());
     }
     ShadowFrame* shadow_frame = GetCurrentShadowFrame();
@@ -2763,7 +2870,8 @@
             LOG(FATAL_WITHOUT_ABORT) << "Method@" << method->GetDexMethodIndex() << ":" << method
                                      << " klass@" << klass;
             // Pretty info last in case it crashes.
-            LOG(FATAL) << "Method " << PrettyMethod(method) << " klass " << PrettyClass(klass);
+            LOG(FATAL) << "Method " << method->PrettyMethod() << " klass "
+                       << klass->PrettyClass();
           }
         }
       }
diff --git a/runtime/thread.h b/runtime/thread.h
index 97053de..b2983cc 100644
--- a/runtime/thread.h
+++ b/runtime/thread.h
@@ -105,7 +105,8 @@
   kSuspendRequest   = 1,  // If set implies that suspend_count_ > 0 and the Thread should enter the
                           // safepoint handler.
   kCheckpointRequest = 2,  // Request that the thread do some checkpoint work and then continue.
-  kActiveSuspendBarrier = 4  // Register that at least 1 suspend barrier needs to be passed.
+  kEmptyCheckpointRequest = 4,  // Request that the thread do empty checkpoint and then continue.
+  kActiveSuspendBarrier = 8,  // Register that at least 1 suspend barrier needs to be passed.
 };
 
 enum class StackedShadowFrameType {
@@ -171,6 +172,9 @@
   // Process pending thread suspension request and handle if pending.
   void CheckSuspend() REQUIRES_SHARED(Locks::mutator_lock_);
 
+  // Process a pending empty checkpoint if pending.
+  void CheckEmptyCheckpoint() REQUIRES_SHARED(Locks::mutator_lock_);
+
   static Thread* FromManagedThread(const ScopedObjectAccessAlreadyRunnable& ts,
                                    mirror::Object* thread_peer)
       REQUIRES(Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_)
@@ -237,6 +241,10 @@
 
   bool RequestCheckpoint(Closure* function)
       REQUIRES(Locks::thread_suspend_count_lock_);
+  void RequestSynchronousCheckpoint(Closure* function)
+      REQUIRES(!Locks::thread_suspend_count_lock_, !Locks::thread_list_lock_);
+  bool RequestEmptyCheckpoint()
+      REQUIRES(Locks::thread_suspend_count_lock_);
 
   void SetFlipFunction(Closure* function);
   Closure* GetFlipFunction();
@@ -331,8 +339,7 @@
   }
 
   // Returns the java.lang.Thread's name, or null if this Thread* doesn't have a peer.
-  mirror::String* GetThreadName(const ScopedObjectAccessAlreadyRunnable& ts) const
-      REQUIRES_SHARED(Locks::mutator_lock_);
+  mirror::String* GetThreadName() const REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Sets 'name' to the java.lang.Thread's name. This requires no transition to managed code,
   // allocation, or locking.
@@ -452,7 +459,7 @@
   }
 
   // Convert a jobject into a Object*
-  mirror::Object* DecodeJObject(jobject obj) const REQUIRES_SHARED(Locks::mutator_lock_);
+  ObjPtr<mirror::Object> DecodeJObject(jobject obj) const REQUIRES_SHARED(Locks::mutator_lock_);
   // Checks if the weak global ref has been cleared by the GC without decoding it.
   bool IsJWeakCleared(jweak obj) const REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -570,6 +577,10 @@
         OFFSETOF_MEMBER(tls_32bit_sized_values, is_gc_marking));
   }
 
+  static constexpr size_t IsGcMarkingSize() {
+    return sizeof(tls32_.is_gc_marking);
+  }
+
   // Deoptimize the Java stack.
   void DeoptimizeWithDeoptimizationException(JValue* result) REQUIRES_SHARED(Locks::mutator_lock_);
 
@@ -727,9 +738,6 @@
     tlsPtr_.stack_end = tlsPtr_.stack_begin + GetStackOverflowReservedBytes(kRuntimeISA);
   }
 
-  // Install the protected region for implicit stack checks.
-  void InstallImplicitProtection();
-
   bool IsHandlingStackOverflow() const {
     return tlsPtr_.stack_end == tlsPtr_.stack_begin;
   }
@@ -780,36 +788,23 @@
         ManagedStack::TopShadowFrameOffset());
   }
 
-  // Number of references allocated in JNI ShadowFrames on this thread.
-  size_t NumJniShadowFrameReferences() const REQUIRES_SHARED(Locks::mutator_lock_) {
-    return tlsPtr_.managed_stack.NumJniShadowFrameReferences();
-  }
-
-  // Number of references in handle scope on this thread.
-  size_t NumHandleReferences();
-
-  // Number of references allocated in handle scopes & JNI shadow frames on this thread.
-  size_t NumStackReferences() REQUIRES_SHARED(Locks::mutator_lock_) {
-    return NumHandleReferences() + NumJniShadowFrameReferences();
-  }
-
   // Is the given obj in this thread's stack indirect reference table?
   bool HandleScopeContains(jobject obj) const;
 
   void HandleScopeVisitRoots(RootVisitor* visitor, uint32_t thread_id)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
-  HandleScope* GetTopHandleScope() {
+  BaseHandleScope* GetTopHandleScope() {
     return tlsPtr_.top_handle_scope;
   }
 
-  void PushHandleScope(HandleScope* handle_scope) {
+  void PushHandleScope(BaseHandleScope* handle_scope) {
     DCHECK_EQ(handle_scope->GetLink(), tlsPtr_.top_handle_scope);
     tlsPtr_.top_handle_scope = handle_scope;
   }
 
-  HandleScope* PopHandleScope() {
-    HandleScope* handle_scope = tlsPtr_.top_handle_scope;
+  BaseHandleScope* PopHandleScope() {
+    BaseHandleScope* handle_scope = tlsPtr_.top_handle_scope;
     DCHECK(handle_scope != nullptr);
     tlsPtr_.top_handle_scope = tlsPtr_.top_handle_scope->GetLink();
     return handle_scope;
@@ -856,10 +851,7 @@
     return tls32_.is_gc_marking;
   }
 
-  void SetIsGcMarking(bool is_marking) {
-    CHECK(kUseReadBarrier);
-    tls32_.is_gc_marking = is_marking;
-  }
+  void SetIsGcMarkingAndUpdateEntrypoints(bool is_marking);
 
   bool GetWeakRefAccessEnabled() const {
     CHECK(kUseReadBarrier);
@@ -978,11 +970,6 @@
     tlsPtr_.held_mutexes[level] = mutex;
   }
 
-  void RunCheckpointFunction();
-
-  bool PassActiveSuspendBarriers(Thread* self)
-      REQUIRES(!Locks::thread_suspend_count_lock_);
-
   void ClearSuspendBarrier(AtomicInteger* target)
       REQUIRES(Locks::thread_suspend_count_lock_);
 
@@ -1232,6 +1219,15 @@
                                   bool for_debugger)
       REQUIRES(Locks::thread_suspend_count_lock_);
 
+  void RunCheckpointFunction();
+  void RunEmptyCheckpoint();
+
+  bool PassActiveSuspendBarriers(Thread* self)
+      REQUIRES(!Locks::thread_suspend_count_lock_);
+
+  // Install the protected region for implicit stack checks.
+  void InstallImplicitProtection();
+
   // 32 bits of atomically changed state and flags. Keeping as 32 bits allows and atomic CAS to
   // change from being Suspended to Runnable without a suspend request occurring.
   union PACKED(4) StateAndFlags {
@@ -1388,7 +1384,7 @@
       stacked_shadow_frame_record(nullptr), deoptimization_context_stack(nullptr),
       frame_id_to_shadow_frame(nullptr), name(nullptr), pthread_self(0),
       last_no_thread_suspension_cause(nullptr), checkpoint_function(nullptr),
-      thread_local_start(nullptr), thread_local_pos(nullptr), thread_local_end(nullptr),
+      thread_local_pos(nullptr), thread_local_end(nullptr), thread_local_start(nullptr),
       thread_local_objects(0), mterp_current_ibase(nullptr), mterp_default_ibase(nullptr),
       mterp_alt_ibase(nullptr), thread_local_alloc_stack_top(nullptr),
       thread_local_alloc_stack_end(nullptr), nested_signal_state(nullptr),
@@ -1446,7 +1442,7 @@
     mirror::Object* monitor_enter_object;
 
     // Top of linked list of handle scopes or null for none.
-    HandleScope* top_handle_scope;
+    BaseHandleScope* top_handle_scope;
 
     // Needed to get the right ClassLoader in JNI_OnLoad, but also
     // useful for testing.
@@ -1502,12 +1498,13 @@
     JniEntryPoints jni_entrypoints;
     QuickEntryPoints quick_entrypoints;
 
-    // Thread-local allocation pointer.
-    uint8_t* thread_local_start;
     // thread_local_pos and thread_local_end must be consecutive for ldrd and are 8 byte aligned for
     // potentially better performance.
     uint8_t* thread_local_pos;
     uint8_t* thread_local_end;
+    // Thread-local allocation pointer.
+    uint8_t* thread_local_start;
+
     size_t thread_local_objects;
 
     // Mterp jump table bases.
@@ -1573,7 +1570,8 @@
 
 class SCOPED_CAPABILITY ScopedAssertNoThreadSuspension {
  public:
-  ALWAYS_INLINE ScopedAssertNoThreadSuspension(const char* cause) ACQUIRE(Roles::uninterruptible_) {
+  ALWAYS_INLINE explicit ScopedAssertNoThreadSuspension(const char* cause)
+      ACQUIRE(Roles::uninterruptible_) {
     if (kIsDebugBuild) {
       self_ = Thread::Current();
       old_cause_ = self_->StartAssertNoThreadSuspension(cause);
diff --git a/runtime/thread_list.cc b/runtime/thread_list.cc
index f40f347..27fb37a 100644
--- a/runtime/thread_list.cc
+++ b/runtime/thread_list.cc
@@ -32,6 +32,7 @@
 #include "base/timing_logger.h"
 #include "debugger.h"
 #include "gc/collector/concurrent_copying.h"
+#include "gc/reference_processor.h"
 #include "jni_internal.h"
 #include "lock_word.h"
 #include "monitor.h"
@@ -61,14 +62,15 @@
 // Whether we should try to dump the native stack of unattached threads. See commit ed8b723 for
 // some history.
 // Turned off again. b/29248079
-static constexpr bool kDumpUnattachedThreadNativeStack = false;
+static constexpr bool kDumpUnattachedThreadNativeStackForSigQuit = false;
 
 ThreadList::ThreadList()
     : suspend_all_count_(0),
       debug_suspend_all_count_(0),
       unregistering_count_(0),
       suspend_all_historam_("suspend all histogram", 16, 64),
-      long_suspend_(false) {
+      long_suspend_(false),
+      empty_checkpoint_barrier_(new Barrier(0)) {
   CHECK(Monitor::IsValidLockWord(LockWord::FromThinLockId(kMaxThreadId, 1, 0U)));
 }
 
@@ -137,7 +139,7 @@
   }
   bool dump_native_stack = Runtime::Current()->GetDumpNativeStackOnSigQuit();
   Dump(os, dump_native_stack);
-  DumpUnattachedThreads(os, dump_native_stack);
+  DumpUnattachedThreads(os, dump_native_stack && kDumpUnattachedThreadNativeStackForSigQuit);
 }
 
 static void DumpUnattachedThread(std::ostream& os, pid_t tid, bool dump_native_stack)
@@ -146,7 +148,7 @@
   // refactor DumpState to avoid skipping analysis.
   Thread::DumpState(os, nullptr, tid);
   DumpKernelStack(os, tid, "  kernel: ", false);
-  if (dump_native_stack && kDumpUnattachedThreadNativeStack) {
+  if (dump_native_stack) {
     DumpNativeStack(os, tid, nullptr, "  native: ");
   }
   os << "\n";
@@ -194,6 +196,7 @@
     // Note thread and self may not be equal if thread was already suspended at the point of the
     // request.
     Thread* self = Thread::Current();
+    CHECK(self != nullptr);
     std::ostringstream local_os;
     {
       ScopedObjectAccess soa(self);
@@ -231,19 +234,24 @@
 };
 
 void ThreadList::Dump(std::ostream& os, bool dump_native_stack) {
+  Thread* self = Thread::Current();
   {
-    MutexLock mu(Thread::Current(), *Locks::thread_list_lock_);
+    MutexLock mu(self, *Locks::thread_list_lock_);
     os << "DALVIK THREADS (" << list_.size() << "):\n";
   }
-  DumpCheckpoint checkpoint(&os, dump_native_stack);
-  size_t threads_running_checkpoint;
-  {
-    // Use SOA to prevent deadlocks if multiple threads are calling Dump() at the same time.
-    ScopedObjectAccess soa(Thread::Current());
-    threads_running_checkpoint = RunCheckpoint(&checkpoint);
-  }
-  if (threads_running_checkpoint != 0) {
-    checkpoint.WaitForThreadsToRunThroughCheckpoint(threads_running_checkpoint);
+  if (self != nullptr) {
+    DumpCheckpoint checkpoint(&os, dump_native_stack);
+    size_t threads_running_checkpoint;
+    {
+      // Use SOA to prevent deadlocks if multiple threads are calling Dump() at the same time.
+      ScopedObjectAccess soa(self);
+      threads_running_checkpoint = RunCheckpoint(&checkpoint);
+    }
+    if (threads_running_checkpoint != 0) {
+      checkpoint.WaitForThreadsToRunThroughCheckpoint(threads_running_checkpoint);
+    }
+  } else {
+    DumpUnattachedThreads(os, dump_native_stack);
   }
 }
 
@@ -367,6 +375,43 @@
   return count;
 }
 
+size_t ThreadList::RunEmptyCheckpoint() {
+  Thread* self = Thread::Current();
+  Locks::mutator_lock_->AssertNotExclusiveHeld(self);
+  Locks::thread_list_lock_->AssertNotHeld(self);
+  Locks::thread_suspend_count_lock_->AssertNotHeld(self);
+
+  size_t count = 0;
+  {
+    MutexLock mu(self, *Locks::thread_list_lock_);
+    MutexLock mu2(self, *Locks::thread_suspend_count_lock_);
+    for (Thread* thread : list_) {
+      if (thread != self) {
+        while (true) {
+          if (thread->RequestEmptyCheckpoint()) {
+            // This thread will run an empty checkpoint (decrement the empty checkpoint barrier)
+            // some time in the near future.
+            ++count;
+            break;
+          }
+          if (thread->GetState() != kRunnable) {
+            // It's seen suspended, we are done because it must not be in the middle of a mutator
+            // heap access.
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  // Wake up the threads blocking for weak ref access so that they will respond to the empty
+  // checkpoint request. Otherwise we will hang as they are blocking in the kRunnable state.
+  Runtime::Current()->GetHeap()->GetReferenceProcessor()->BroadcastForSlowPath(self);
+  Runtime::Current()->BroadcastForNewSystemWeaks(/*broadcast_for_checkpoint*/true);
+
+  return count;
+}
+
 // Request that a checkpoint function be run on all active (non-suspended)
 // threads.  Returns the number of successful requests.
 size_t ThreadList::RunCheckpointOnRunnableThreads(Closure* checkpoint_function) {
@@ -1236,7 +1281,7 @@
     // Initialize according to the state of the CC collector.
     bool is_gc_marking =
         Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->IsMarking();
-    self->SetIsGcMarking(is_gc_marking);
+    self->SetIsGcMarkingAndUpdateEntrypoints(is_gc_marking);
     bool weak_ref_access_enabled =
         Runtime::Current()->GetHeap()->ConcurrentCopyingCollector()->IsWeakRefAccessEnabled();
     self->SetWeakRefAccessEnabled(weak_ref_access_enabled);
diff --git a/runtime/thread_list.h b/runtime/thread_list.h
index b455e31..133d430 100644
--- a/runtime/thread_list.h
+++ b/runtime/thread_list.h
@@ -17,6 +17,7 @@
 #ifndef ART_RUNTIME_THREAD_LIST_H_
 #define ART_RUNTIME_THREAD_LIST_H_
 
+#include "barrier.h"
 #include "base/histogram.h"
 #include "base/mutex.h"
 #include "base/value_object.h"
@@ -100,6 +101,14 @@
   size_t RunCheckpoint(Closure* checkpoint_function, Closure* callback = nullptr)
       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
 
+  // Run an empty checkpoint on threads. Wait until threads pass the next suspend point or are
+  // suspended. This is used to ensure that the threads finish or aren't in the middle of an
+  // in-flight mutator heap access (eg. a read barrier.) Runnable threads will respond by
+  // decrementing the empty checkpoint barrier count. This works even when the weak ref access is
+  // disabled. Only one concurrent use is currently supported.
+  size_t RunEmptyCheckpoint()
+      REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
+
   size_t RunCheckpointOnRunnableThreads(Closure* checkpoint_function)
       REQUIRES(!Locks::thread_list_lock_, !Locks::thread_suspend_count_lock_);
 
@@ -158,6 +167,10 @@
   void DumpNativeStacks(std::ostream& os)
       REQUIRES(!Locks::thread_list_lock_);
 
+  Barrier* EmptyCheckpointBarrier() {
+    return empty_checkpoint_barrier_.get();
+  }
+
  private:
   uint32_t AllocThreadId(Thread* self);
   void ReleaseThreadId(Thread* self, uint32_t id) REQUIRES(!Locks::allocated_thread_ids_lock_);
@@ -203,6 +216,8 @@
   // Whether or not the current thread suspension is long.
   bool long_suspend_;
 
+  std::unique_ptr<Barrier> empty_checkpoint_barrier_;
+
   friend class Thread;
 
   DISALLOW_COPY_AND_ASSIGN(ThreadList);
diff --git a/runtime/ti/agent.cc b/runtime/ti/agent.cc
index 7c0ea64..d21ff77 100644
--- a/runtime/ti/agent.cc
+++ b/runtime/ti/agent.cc
@@ -25,17 +25,10 @@
 const char* AGENT_ON_ATTACH_FUNCTION_NAME = "Agent_OnAttach";
 const char* AGENT_ON_UNLOAD_FUNCTION_NAME = "Agent_OnUnload";
 
-Agent Agent::Create(std::string arg) {
-  size_t eq = arg.find_first_of('=');
-  if (eq == std::string::npos) {
-    return Agent(arg, "");
-  } else {
-    return Agent(arg.substr(0, eq), arg.substr(eq + 1, arg.length()));
-  }
-}
-
 // TODO We need to acquire some locks probably.
-Agent::LoadError Agent::Load(/*out*/jint* call_res, /*out*/ std::string* error_msg) {
+Agent::LoadError Agent::DoLoadHelper(bool attaching,
+                                     /*out*/jint* call_res,
+                                     /*out*/std::string* error_msg) {
   DCHECK(call_res != nullptr);
   DCHECK(error_msg != nullptr);
 
@@ -49,8 +42,10 @@
     VLOG(agents) << "err: " << *error_msg;
     return err;
   }
-  if (onload_ == nullptr) {
-    *error_msg = StringPrintf("Unable to start agent %s: No Agent_OnLoad function found",
+  AgentOnLoadFunction callback = attaching ? onattach_ : onload_;
+  if (callback == nullptr) {
+    *error_msg = StringPrintf("Unable to start agent %s: No %s callback found",
+                              (attaching ? "attach" : "load"),
                               name_.c_str());
     VLOG(agents) << "err: " << *error_msg;
     return kLoadingError;
@@ -59,9 +54,9 @@
   std::unique_ptr<char[]> copied_args(new char[args_.size() + 1]);
   strcpy(copied_args.get(), args_.c_str());
   // TODO Need to do some checks that we are at a good spot etc.
-  *call_res = onload_(static_cast<JavaVM*>(Runtime::Current()->GetJavaVM()),
-                      copied_args.get(),
-                      nullptr);
+  *call_res = callback(Runtime::Current()->GetJavaVM(),
+                       copied_args.get(),
+                       nullptr);
   if (*call_res != 0) {
     *error_msg = StringPrintf("Initialization of %s returned non-zero value of %d",
                               name_.c_str(), *call_res);
@@ -74,6 +69,12 @@
 
 Agent::LoadError Agent::DoDlOpen(/*out*/std::string* error_msg) {
   DCHECK(error_msg != nullptr);
+
+  DCHECK(dlopen_handle_ == nullptr);
+  DCHECK(onload_ == nullptr);
+  DCHECK(onattach_ == nullptr);
+  DCHECK(onunload_ == nullptr);
+
   dlopen_handle_ = dlopen(name_.c_str(), RTLD_LAZY);
   if (dlopen_handle_ == nullptr) {
     *error_msg = StringPrintf("Unable to dlopen %s: %s", name_.c_str(), dlerror());
@@ -85,7 +86,7 @@
   if (onload_ == nullptr) {
     VLOG(agents) << "Unable to find 'Agent_OnLoad' symbol in " << this;
   }
-  onattach_ = reinterpret_cast<AgentOnAttachFunction>(dlsym(dlopen_handle_,
+  onattach_ = reinterpret_cast<AgentOnLoadFunction>(dlsym(dlopen_handle_,
                                                             AGENT_ON_ATTACH_FUNCTION_NAME));
   if (onattach_ == nullptr) {
     VLOG(agents) << "Unable to find 'Agent_OnAttach' symbol in " << this;
@@ -106,23 +107,93 @@
     }
     dlclose(dlopen_handle_);
     dlopen_handle_ = nullptr;
+    onload_ = nullptr;
+    onattach_ = nullptr;
+    onunload_ = nullptr;
   } else {
     VLOG(agents) << this << " is not currently loaded!";
   }
 }
 
-Agent::Agent(const Agent& other)
-  : name_(other.name_),
-    args_(other.args_),
-    dlopen_handle_(other.dlopen_handle_),
-    onload_(other.onload_),
-    onattach_(other.onattach_),
-    onunload_(other.onunload_) {
-  if (other.dlopen_handle_ != nullptr) {
-    dlopen(other.name_.c_str(), 0);
+Agent::Agent(std::string arg)
+    : dlopen_handle_(nullptr),
+      onload_(nullptr),
+      onattach_(nullptr),
+      onunload_(nullptr) {
+  size_t eq = arg.find_first_of('=');
+  if (eq == std::string::npos) {
+    name_ = arg;
+  } else {
+    name_ = arg.substr(0, eq);
+    args_ = arg.substr(eq + 1, arg.length());
   }
 }
 
+Agent::Agent(const Agent& other)
+    : dlopen_handle_(nullptr),
+      onload_(nullptr),
+      onattach_(nullptr),
+      onunload_(nullptr) {
+  *this = other;
+}
+
+// Attempting to copy to/from loaded/started agents is a fatal error
+Agent& Agent::operator=(const Agent& other) {
+  if (this != &other) {
+    if (other.dlopen_handle_ != nullptr) {
+      LOG(FATAL) << "Attempting to copy a loaded agent!";
+    }
+
+    if (dlopen_handle_ != nullptr) {
+      LOG(FATAL) << "Attempting to assign into a loaded agent!";
+    }
+
+    DCHECK(other.onload_ == nullptr);
+    DCHECK(other.onattach_ == nullptr);
+    DCHECK(other.onunload_ == nullptr);
+
+    DCHECK(onload_ == nullptr);
+    DCHECK(onattach_ == nullptr);
+    DCHECK(onunload_ == nullptr);
+
+    name_ = other.name_;
+    args_ = other.args_;
+
+    dlopen_handle_ = nullptr;
+    onload_ = nullptr;
+    onattach_ = nullptr;
+    onunload_ = nullptr;
+  }
+  return *this;
+}
+
+Agent::Agent(Agent&& other)
+    : dlopen_handle_(nullptr),
+      onload_(nullptr),
+      onattach_(nullptr),
+      onunload_(nullptr) {
+  *this = std::move(other);
+}
+
+Agent& Agent::operator=(Agent&& other) {
+  if (this != &other) {
+    if (dlopen_handle_ != nullptr) {
+      dlclose(dlopen_handle_);
+    }
+    name_ = std::move(other.name_);
+    args_ = std::move(other.args_);
+    dlopen_handle_ = other.dlopen_handle_;
+    onload_ = other.onload_;
+    onattach_ = other.onattach_;
+    onunload_ = other.onunload_;
+    other.dlopen_handle_ = nullptr;
+    other.onload_ = nullptr;
+    other.onattach_ = nullptr;
+    other.onunload_ = nullptr;
+  }
+  return *this;
+}
+
 Agent::~Agent() {
   if (dlopen_handle_ != nullptr) {
     dlclose(dlopen_handle_);
diff --git a/runtime/ti/agent.h b/runtime/ti/agent.h
index 521e21e..6561756 100644
--- a/runtime/ti/agent.h
+++ b/runtime/ti/agent.h
@@ -28,9 +28,10 @@
 namespace ti {
 
 using AgentOnLoadFunction = jint (*)(JavaVM*, const char*, void*);
-using AgentOnAttachFunction = jint (*)(JavaVM*, const char*, void*);
 using AgentOnUnloadFunction = void (*)(JavaVM*);
 
+// TODO: consider splitting ti::Agent into command line, agent and shared library handler classes
+
 class Agent {
  public:
   enum LoadError {
@@ -56,65 +57,44 @@
     return !GetArgs().empty();
   }
 
-  // TODO We need to acquire some locks probably.
-  LoadError Load(/*out*/jint* call_res, /*out*/std::string* error_msg);
+  LoadError Load(/*out*/jint* call_res, /*out*/std::string* error_msg) {
+    VLOG(agents) << "Loading agent: " << name_ << " " << args_;
+    return DoLoadHelper(false, call_res, error_msg);
+  }
 
   // TODO We need to acquire some locks probably.
   void Unload();
 
   // Tries to attach the agent using its OnAttach method. Returns true on success.
-  // TODO We need to acquire some locks probably.
-  LoadError Attach(std::string* error_msg) {
-    // TODO
-    *error_msg = "Attach has not yet been implemented!";
-    return kLoadingError;
+  LoadError Attach(/*out*/jint* call_res, /*out*/std::string* error_msg) {
+    VLOG(agents) << "Attaching agent: " << name_ << " " << args_;
+    return DoLoadHelper(true, call_res, error_msg);
   }
 
-  static Agent Create(std::string arg);
+  explicit Agent(std::string arg);
 
-  static Agent Create(std::string name, std::string args) {
-    return Agent(name, args);
-  }
+  Agent(const Agent& other);
+  Agent& operator=(const Agent& other);
+
+  Agent(Agent&& other);
+  Agent& operator=(Agent&& other);
 
   ~Agent();
 
-  // We need move constructor and copy for vectors
-  Agent(const Agent& other);
-
-  Agent(Agent&& other)
-      : name_(other.name_),
-        args_(other.args_),
-        dlopen_handle_(nullptr),
-        onload_(nullptr),
-        onattach_(nullptr),
-        onunload_(nullptr) {
-    other.dlopen_handle_ = nullptr;
-    other.onload_ = nullptr;
-    other.onattach_ = nullptr;
-    other.onunload_ = nullptr;
-  }
-
-  // We don't need an operator=
-  void operator=(const Agent&) = delete;
-
  private:
-  Agent(std::string name, std::string args)
-      : name_(name),
-        args_(args),
-        dlopen_handle_(nullptr),
-        onload_(nullptr),
-        onattach_(nullptr),
-        onunload_(nullptr) { }
-
   LoadError DoDlOpen(/*out*/std::string* error_msg);
 
-  const std::string name_;
-  const std::string args_;
+  LoadError DoLoadHelper(bool attaching,
+                         /*out*/jint* call_res,
+                         /*out*/std::string* error_msg);
+
+  std::string name_;
+  std::string args_;
   void* dlopen_handle_;
 
   // The entrypoints.
   AgentOnLoadFunction onload_;
-  AgentOnAttachFunction onattach_;
+  AgentOnLoadFunction onattach_;
   AgentOnUnloadFunction onunload_;
 
   friend std::ostream& operator<<(std::ostream &os, Agent const& m);
diff --git a/runtime/trace.cc b/runtime/trace.cc
index f846746..f564de4 100644
--- a/runtime/trace.cc
+++ b/runtime/trace.cc
@@ -641,7 +641,7 @@
     uint32_t tmid = ReadBytes(ptr + 2, sizeof(tmid));
     ArtMethod* method = DecodeTraceMethod(tmid);
     TraceAction action = DecodeTraceAction(tmid);
-    LOG(INFO) << PrettyMethod(method) << " " << static_cast<int>(action);
+    LOG(INFO) << ArtMethod::PrettyMethod(method) << " " << static_cast<int>(action);
     ptr += GetRecordSize(clock_source);
   }
 }
@@ -739,7 +739,8 @@
                        ArtMethod* method,
                        uint32_t new_dex_pc) {
   // We're not recorded to listen to this kind of event, so complain.
-  LOG(ERROR) << "Unexpected dex PC event in tracing " << PrettyMethod(method) << " " << new_dex_pc;
+  LOG(ERROR) << "Unexpected dex PC event in tracing " << ArtMethod::PrettyMethod(method)
+             << " " << new_dex_pc;
 }
 
 void Trace::FieldRead(Thread* thread ATTRIBUTE_UNUSED,
@@ -749,7 +750,8 @@
                       ArtField* field ATTRIBUTE_UNUSED)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   // We're not recorded to listen to this kind of event, so complain.
-  LOG(ERROR) << "Unexpected field read event in tracing " << PrettyMethod(method) << " " << dex_pc;
+  LOG(ERROR) << "Unexpected field read event in tracing " << ArtMethod::PrettyMethod(method)
+             << " " << dex_pc;
 }
 
 void Trace::FieldWritten(Thread* thread ATTRIBUTE_UNUSED,
@@ -760,7 +762,8 @@
                          const JValue& field_value ATTRIBUTE_UNUSED)
     REQUIRES_SHARED(Locks::mutator_lock_) {
   // We're not recorded to listen to this kind of event, so complain.
-  LOG(ERROR) << "Unexpected field write event in tracing " << PrettyMethod(method) << " " << dex_pc;
+  LOG(ERROR) << "Unexpected field write event in tracing " << ArtMethod::PrettyMethod(method)
+             << " " << dex_pc;
 }
 
 void Trace::MethodEntered(Thread* thread, mirror::Object* this_object ATTRIBUTE_UNUSED,
@@ -800,7 +803,7 @@
 void Trace::Branch(Thread* /*thread*/, ArtMethod* method,
                    uint32_t /*dex_pc*/, int32_t /*dex_pc_offset*/)
       REQUIRES_SHARED(Locks::mutator_lock_) {
-  LOG(ERROR) << "Unexpected branch event in tracing" << PrettyMethod(method);
+  LOG(ERROR) << "Unexpected branch event in tracing" << ArtMethod::PrettyMethod(method);
 }
 
 void Trace::InvokeVirtualOrInterface(Thread*,
@@ -808,7 +811,7 @@
                                      ArtMethod* method,
                                      uint32_t dex_pc,
                                      ArtMethod*) {
-  LOG(ERROR) << "Unexpected invoke event in tracing" << PrettyMethod(method)
+  LOG(ERROR) << "Unexpected invoke event in tracing" << ArtMethod::PrettyMethod(method)
              << " " << dex_pc;
 }
 
diff --git a/runtime/transaction.cc b/runtime/transaction.cc
index 9f8d981..c5da5d2 100644
--- a/runtime/transaction.cc
+++ b/runtime/transaction.cc
@@ -167,29 +167,29 @@
   array_log.LogValue(index, value);
 }
 
-void Transaction::RecordResolveString(mirror::DexCache* dex_cache, uint32_t string_idx) {
+void Transaction::RecordResolveString(ObjPtr<mirror::DexCache> dex_cache, uint32_t string_idx) {
   DCHECK(dex_cache != nullptr);
   DCHECK_LT(string_idx, dex_cache->GetDexFile()->NumStringIds());
   MutexLock mu(Thread::Current(), log_lock_);
   resolve_string_logs_.push_back(ResolveStringLog(dex_cache, string_idx));
 }
 
-void Transaction::RecordStrongStringInsertion(mirror::String* s) {
+void Transaction::RecordStrongStringInsertion(ObjPtr<mirror::String> s) {
   InternStringLog log(s, InternStringLog::kStrongString, InternStringLog::kInsert);
   LogInternedString(log);
 }
 
-void Transaction::RecordWeakStringInsertion(mirror::String* s) {
+void Transaction::RecordWeakStringInsertion(ObjPtr<mirror::String> s) {
   InternStringLog log(s, InternStringLog::kWeakString, InternStringLog::kInsert);
   LogInternedString(log);
 }
 
-void Transaction::RecordStrongStringRemoval(mirror::String* s) {
+void Transaction::RecordStrongStringRemoval(ObjPtr<mirror::String> s) {
   InternStringLog log(s, InternStringLog::kStrongString, InternStringLog::kRemove);
   LogInternedString(log);
 }
 
-void Transaction::RecordWeakStringRemoval(mirror::String* s) {
+void Transaction::RecordWeakStringRemoval(ObjPtr<mirror::String> s) {
   InternStringLog log(s, InternStringLog::kWeakString, InternStringLog::kRemove);
   LogInternedString(log);
 }
@@ -470,10 +470,10 @@
     case InternStringLog::kInsert: {
       switch (string_kind_) {
         case InternStringLog::kStrongString:
-          intern_table->RemoveStrongFromTransaction(str_);
+          intern_table->RemoveStrongFromTransaction(str_.Read());
           break;
         case InternStringLog::kWeakString:
-          intern_table->RemoveWeakFromTransaction(str_);
+          intern_table->RemoveWeakFromTransaction(str_.Read());
           break;
         default:
           LOG(FATAL) << "Unknown interned string kind";
@@ -484,10 +484,10 @@
     case InternStringLog::kRemove: {
       switch (string_kind_) {
         case InternStringLog::kStrongString:
-          intern_table->InsertStrongFromTransaction(str_);
+          intern_table->InsertStrongFromTransaction(str_.Read());
           break;
         case InternStringLog::kWeakString:
-          intern_table->InsertWeakFromTransaction(str_);
+          intern_table->InsertWeakFromTransaction(str_.Read());
           break;
         default:
           LOG(FATAL) << "Unknown interned string kind";
@@ -502,14 +502,15 @@
 }
 
 void Transaction::InternStringLog::VisitRoots(RootVisitor* visitor) {
-  visitor->VisitRoot(reinterpret_cast<mirror::Object**>(&str_), RootInfo(kRootInternedString));
+  str_.VisitRoot(visitor, RootInfo(kRootInternedString));
 }
 
 void Transaction::ResolveStringLog::Undo() {
   dex_cache_.Read()->ClearString(string_idx_);
 }
 
-Transaction::ResolveStringLog::ResolveStringLog(mirror::DexCache* dex_cache, uint32_t string_idx)
+Transaction::ResolveStringLog::ResolveStringLog(ObjPtr<mirror::DexCache> dex_cache,
+                                                uint32_t string_idx)
     : dex_cache_(dex_cache),
       string_idx_(string_idx) {
   DCHECK(dex_cache != nullptr);
@@ -520,6 +521,15 @@
   dex_cache_.VisitRoot(visitor, RootInfo(kRootVMInternal));
 }
 
+Transaction::InternStringLog::InternStringLog(ObjPtr<mirror::String> s,
+                                              StringKind kind,
+                                              StringOp op)
+    : str_(s),
+      string_kind_(kind),
+      string_op_(op) {
+  DCHECK(s != nullptr);
+}
+
 void Transaction::ArrayLog::LogValue(size_t index, uint64_t value) {
   auto it = array_values_.find(index);
   if (it == array_values_.end()) {
diff --git a/runtime/transaction.h b/runtime/transaction.h
index 584dfb8..2ec2f50 100644
--- a/runtime/transaction.h
+++ b/runtime/transaction.h
@@ -83,21 +83,21 @@
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Record intern string table changes.
-  void RecordStrongStringInsertion(mirror::String* s)
+  void RecordStrongStringInsertion(ObjPtr<mirror::String> s)
       REQUIRES(Locks::intern_table_lock_)
       REQUIRES(!log_lock_);
-  void RecordWeakStringInsertion(mirror::String* s)
+  void RecordWeakStringInsertion(ObjPtr<mirror::String> s)
       REQUIRES(Locks::intern_table_lock_)
       REQUIRES(!log_lock_);
-  void RecordStrongStringRemoval(mirror::String* s)
+  void RecordStrongStringRemoval(ObjPtr<mirror::String> s)
       REQUIRES(Locks::intern_table_lock_)
       REQUIRES(!log_lock_);
-  void RecordWeakStringRemoval(mirror::String* s)
+  void RecordWeakStringRemoval(ObjPtr<mirror::String> s)
       REQUIRES(Locks::intern_table_lock_)
       REQUIRES(!log_lock_);
 
   // Record resolve string.
-  void RecordResolveString(mirror::DexCache* dex_cache, uint32_t string_idx)
+  void RecordResolveString(ObjPtr<mirror::DexCache> dex_cache, uint32_t string_idx)
       REQUIRES_SHARED(Locks::mutator_lock_)
       REQUIRES(!log_lock_);
 
@@ -182,10 +182,7 @@
       kInsert,
       kRemove
     };
-    InternStringLog(mirror::String* s, StringKind kind, StringOp op)
-      : str_(s), string_kind_(kind), string_op_(op) {
-      DCHECK(s != nullptr);
-    }
+    InternStringLog(ObjPtr<mirror::String> s, StringKind kind, StringOp op);
 
     void Undo(InternTable* intern_table)
         REQUIRES_SHARED(Locks::mutator_lock_)
@@ -193,14 +190,14 @@
     void VisitRoots(RootVisitor* visitor) REQUIRES_SHARED(Locks::mutator_lock_);
 
    private:
-    mirror::String* str_;
+    GcRoot<mirror::String> str_;
     const StringKind string_kind_;
     const StringOp string_op_;
   };
 
   class ResolveStringLog : public ValueObject {
    public:
-    ResolveStringLog(mirror::DexCache* dex_cache, uint32_t string_idx);
+    ResolveStringLog(ObjPtr<mirror::DexCache> dex_cache, uint32_t string_idx);
 
     void Undo() REQUIRES_SHARED(Locks::mutator_lock_);
 
diff --git a/runtime/utf_test.cc b/runtime/utf_test.cc
index 3284925..d1e9751 100644
--- a/runtime/utf_test.cc
+++ b/runtime/utf_test.cc
@@ -113,8 +113,8 @@
   EXPECT_EQ(2u, CountModifiedUtf8Chars(reinterpret_cast<const char *>(kSurrogateEncoding)));
 }
 
-static void AssertConversion(const std::vector<uint16_t> input,
-                             const std::vector<uint8_t> expected) {
+static void AssertConversion(const std::vector<uint16_t>& input,
+                             const std::vector<uint8_t>& expected) {
   ASSERT_EQ(expected.size(), CountUtf8Bytes(&input[0], input.size()));
 
   std::vector<uint8_t> output(expected.size());
diff --git a/runtime/utils.cc b/runtime/utils.cc
index a40e313..6ed54f7 100644
--- a/runtime/utils.cc
+++ b/runtime/utils.cc
@@ -25,19 +25,11 @@
 #include <unistd.h>
 #include <memory>
 
-#include "art_field-inl.h"
-#include "art_method-inl.h"
 #include "base/stl_util.h"
 #include "base/unix_file/fd_file.h"
 #include "dex_file-inl.h"
 #include "dex_instruction.h"
-#include "mirror/class-inl.h"
-#include "mirror/class_loader.h"
-#include "mirror/object-inl.h"
-#include "mirror/object_array-inl.h"
-#include "mirror/string.h"
 #include "oat_quick_method_header.h"
-#include "obj_ptr-inl.h"
 #include "os.h"
 #include "scoped_thread_state_change-inl.h"
 #include "utf-inl.h"
@@ -271,21 +263,6 @@
   }
 }
 
-std::string PrettyStringDescriptor(ObjPtr<mirror::String> java_descriptor) {
-  if (java_descriptor == nullptr) {
-    return "null";
-  }
-  return PrettyDescriptor(java_descriptor->ToModifiedUtf8().c_str());
-}
-
-std::string PrettyDescriptor(ObjPtr<mirror::Class> klass) {
-  if (klass == nullptr) {
-    return "null";
-  }
-  std::string temp;
-  return PrettyDescriptor(klass->GetDescriptor(&temp));
-}
-
 std::string PrettyDescriptor(const char* descriptor) {
   // Count the number of '['s to get the dimensionality.
   const char* c = descriptor;
@@ -335,46 +312,6 @@
   return result;
 }
 
-std::string PrettyField(ArtField* f, bool with_type) {
-  if (f == nullptr) {
-    return "null";
-  }
-  std::string result;
-  if (with_type) {
-    result += PrettyDescriptor(f->GetTypeDescriptor());
-    result += ' ';
-  }
-  std::string temp;
-  result += PrettyDescriptor(f->GetDeclaringClass()->GetDescriptor(&temp));
-  result += '.';
-  result += f->GetName();
-  return result;
-}
-
-std::string PrettyField(uint32_t field_idx, const DexFile& dex_file, bool with_type) {
-  if (field_idx >= dex_file.NumFieldIds()) {
-    return StringPrintf("<<invalid-field-idx-%d>>", field_idx);
-  }
-  const DexFile::FieldId& field_id = dex_file.GetFieldId(field_idx);
-  std::string result;
-  if (with_type) {
-    result += dex_file.GetFieldTypeDescriptor(field_id);
-    result += ' ';
-  }
-  result += PrettyDescriptor(dex_file.GetFieldDeclaringClassDescriptor(field_id));
-  result += '.';
-  result += dex_file.GetFieldName(field_id);
-  return result;
-}
-
-std::string PrettyType(uint32_t type_idx, const DexFile& dex_file) {
-  if (type_idx >= dex_file.NumTypeIds()) {
-    return StringPrintf("<<invalid-type-idx-%d>>", type_idx);
-  }
-  const DexFile::TypeId& type_id = dex_file.GetTypeId(type_idx);
-  return PrettyDescriptor(dex_file.GetTypeDescriptor(type_id));
-}
-
 std::string PrettyArguments(const char* signature) {
   std::string result;
   result += '(';
@@ -412,91 +349,6 @@
   return PrettyDescriptor(return_type);
 }
 
-std::string PrettyMethod(ArtMethod* m, bool with_signature) {
-  if (m == nullptr) {
-    return "null";
-  }
-  if (!m->IsRuntimeMethod()) {
-    m = m->GetInterfaceMethodIfProxy(Runtime::Current()->GetClassLinker()->GetImagePointerSize());
-  }
-  std::string result(PrettyDescriptor(m->GetDeclaringClassDescriptor()));
-  result += '.';
-  result += m->GetName();
-  if (UNLIKELY(m->IsFastNative())) {
-    result += "!";
-  }
-  if (with_signature) {
-    const Signature signature = m->GetSignature();
-    std::string sig_as_string(signature.ToString());
-    if (signature == Signature::NoSignature()) {
-      return result + sig_as_string;
-    }
-    result = PrettyReturnType(sig_as_string.c_str()) + " " + result +
-        PrettyArguments(sig_as_string.c_str());
-  }
-  return result;
-}
-
-std::string PrettyMethod(uint32_t method_idx, const DexFile& dex_file, bool with_signature) {
-  if (method_idx >= dex_file.NumMethodIds()) {
-    return StringPrintf("<<invalid-method-idx-%d>>", method_idx);
-  }
-  const DexFile::MethodId& method_id = dex_file.GetMethodId(method_idx);
-  std::string result(PrettyDescriptor(dex_file.GetMethodDeclaringClassDescriptor(method_id)));
-  result += '.';
-  result += dex_file.GetMethodName(method_id);
-  if (with_signature) {
-    const Signature signature = dex_file.GetMethodSignature(method_id);
-    std::string sig_as_string(signature.ToString());
-    if (signature == Signature::NoSignature()) {
-      return result + sig_as_string;
-    }
-    result = PrettyReturnType(sig_as_string.c_str()) + " " + result +
-        PrettyArguments(sig_as_string.c_str());
-  }
-  return result;
-}
-
-std::string PrettyTypeOf(ObjPtr<mirror::Object> obj) {
-  if (obj == nullptr) {
-    return "null";
-  }
-  if (obj->GetClass() == nullptr) {
-    return "(raw)";
-  }
-  std::string temp;
-  std::string result(PrettyDescriptor(obj->GetClass()->GetDescriptor(&temp)));
-  if (obj->IsClass()) {
-    result += "<" + PrettyDescriptor(obj->AsClass()->GetDescriptor(&temp)) + ">";
-  }
-  return result;
-}
-
-std::string PrettyClass(ObjPtr<mirror::Class> c) {
-  if (c == nullptr) {
-    return "null";
-  }
-  std::string result;
-  result += "java.lang.Class<";
-  result += PrettyDescriptor(c);
-  result += ">";
-  return result;
-}
-
-std::string PrettyClassAndClassLoader(ObjPtr<mirror::Class> c) {
-  if (c == nullptr) {
-    return "null";
-  }
-  std::string result;
-  result += "java.lang.Class<";
-  result += PrettyDescriptor(c);
-  result += ",";
-  result += PrettyTypeOf(c->GetClassLoader());
-  // TODO: add an identifying hash value for the loader
-  result += ">";
-  return result;
-}
-
 std::string PrettyJavaAccessFlags(uint32_t access_flags) {
   std::string result;
   if ((access_flags & kAccPublic) != 0) {
@@ -672,38 +524,6 @@
   return descriptor;
 }
 
-std::string JniShortName(ArtMethod* m) {
-  std::string class_name(m->GetDeclaringClassDescriptor());
-  // Remove the leading 'L' and trailing ';'...
-  CHECK_EQ(class_name[0], 'L') << class_name;
-  CHECK_EQ(class_name[class_name.size() - 1], ';') << class_name;
-  class_name.erase(0, 1);
-  class_name.erase(class_name.size() - 1, 1);
-
-  std::string method_name(m->GetName());
-
-  std::string short_name;
-  short_name += "Java_";
-  short_name += MangleForJni(class_name);
-  short_name += "_";
-  short_name += MangleForJni(method_name);
-  return short_name;
-}
-
-std::string JniLongName(ArtMethod* m) {
-  std::string long_name;
-  long_name += JniShortName(m);
-  long_name += "__";
-
-  std::string signature(m->GetSignature().ToString());
-  signature.erase(0, 1);
-  signature.erase(signature.begin() + signature.find(')'), signature.end());
-
-  long_name += MangleForJni(signature);
-
-  return long_name;
-}
-
 // Helper for IsValidPartOfMemberNameUtf8(), a bit vector indicating valid low ascii.
 uint32_t DEX_MEMBER_VALID_LOW_ASCII[4] = {
   0x00000000,  // 00..1f low control characters; nothing valid
@@ -1304,377 +1124,6 @@
   return PrettyDescriptor(Primitive::Descriptor(type));
 }
 
-static void DumpMethodCFGImpl(const DexFile* dex_file,
-                              uint32_t dex_method_idx,
-                              const DexFile::CodeItem* code_item,
-                              std::ostream& os) {
-  os << "digraph {\n";
-  os << "  # /* " << PrettyMethod(dex_method_idx, *dex_file, true) << " */\n";
-
-  std::set<uint32_t> dex_pc_is_branch_target;
-  {
-    // Go and populate.
-    const Instruction* inst = Instruction::At(code_item->insns_);
-    for (uint32_t dex_pc = 0;
-         dex_pc < code_item->insns_size_in_code_units_;
-         dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) {
-      if (inst->IsBranch()) {
-        dex_pc_is_branch_target.insert(dex_pc + inst->GetTargetOffset());
-      } else if (inst->IsSwitch()) {
-        const uint16_t* insns = code_item->insns_ + dex_pc;
-        int32_t switch_offset = insns[1] | (static_cast<int32_t>(insns[2]) << 16);
-        const uint16_t* switch_insns = insns + switch_offset;
-        uint32_t switch_count = switch_insns[1];
-        int32_t targets_offset;
-        if ((*insns & 0xff) == Instruction::PACKED_SWITCH) {
-          /* 0=sig, 1=count, 2/3=firstKey */
-          targets_offset = 4;
-        } else {
-          /* 0=sig, 1=count, 2..count*2 = keys */
-          targets_offset = 2 + 2 * switch_count;
-        }
-        for (uint32_t targ = 0; targ < switch_count; targ++) {
-          int32_t offset =
-              static_cast<int32_t>(switch_insns[targets_offset + targ * 2]) |
-              static_cast<int32_t>(switch_insns[targets_offset + targ * 2 + 1] << 16);
-          dex_pc_is_branch_target.insert(dex_pc + offset);
-        }
-      }
-    }
-  }
-
-  // Create nodes for "basic blocks."
-  std::map<uint32_t, uint32_t> dex_pc_to_node_id;  // This only has entries for block starts.
-  std::map<uint32_t, uint32_t> dex_pc_to_incl_id;  // This has entries for all dex pcs.
-
-  {
-    const Instruction* inst = Instruction::At(code_item->insns_);
-    bool first_in_block = true;
-    bool force_new_block = false;
-    for (uint32_t dex_pc = 0;
-         dex_pc < code_item->insns_size_in_code_units_;
-         dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) {
-      if (dex_pc == 0 ||
-          (dex_pc_is_branch_target.find(dex_pc) != dex_pc_is_branch_target.end()) ||
-          force_new_block) {
-        uint32_t id = dex_pc_to_node_id.size();
-        if (id > 0) {
-          // End last node.
-          os << "}\"];\n";
-        }
-        // Start next node.
-        os << "  node" << id << " [shape=record,label=\"{";
-        dex_pc_to_node_id.insert(std::make_pair(dex_pc, id));
-        first_in_block = true;
-        force_new_block = false;
-      }
-
-      // Register instruction.
-      dex_pc_to_incl_id.insert(std::make_pair(dex_pc, dex_pc_to_node_id.size() - 1));
-
-      // Print instruction.
-      if (!first_in_block) {
-        os << " | ";
-      } else {
-        first_in_block = false;
-      }
-
-      // Dump the instruction. Need to escape '"', '<', '>', '{' and '}'.
-      os << "<" << "p" << dex_pc << ">";
-      os << " 0x" << std::hex << dex_pc << std::dec << ": ";
-      std::string inst_str = inst->DumpString(dex_file);
-      size_t cur_start = 0;  // It's OK to start at zero, instruction dumps don't start with chars
-                             // we need to escape.
-      while (cur_start != std::string::npos) {
-        size_t next_escape = inst_str.find_first_of("\"{}<>", cur_start + 1);
-        if (next_escape == std::string::npos) {
-          os << inst_str.substr(cur_start, inst_str.size() - cur_start);
-          break;
-        } else {
-          os << inst_str.substr(cur_start, next_escape - cur_start);
-          // Escape all necessary characters.
-          while (next_escape < inst_str.size()) {
-            char c = inst_str.at(next_escape);
-            if (c == '"' || c == '{' || c == '}' || c == '<' || c == '>') {
-              os << '\\' << c;
-            } else {
-              break;
-            }
-            next_escape++;
-          }
-          if (next_escape >= inst_str.size()) {
-            next_escape = std::string::npos;
-          }
-          cur_start = next_escape;
-        }
-      }
-
-      // Force a new block for some fall-throughs and some instructions that terminate the "local"
-      // control flow.
-      force_new_block = inst->IsSwitch() || inst->IsBasicBlockEnd();
-    }
-    // Close last node.
-    if (dex_pc_to_node_id.size() > 0) {
-      os << "}\"];\n";
-    }
-  }
-
-  // Create edges between them.
-  {
-    std::ostringstream regular_edges;
-    std::ostringstream taken_edges;
-    std::ostringstream exception_edges;
-
-    // Common set of exception edges.
-    std::set<uint32_t> exception_targets;
-
-    // These blocks (given by the first dex pc) need exception per dex-pc handling in a second
-    // pass. In the first pass we try and see whether we can use a common set of edges.
-    std::set<uint32_t> blocks_with_detailed_exceptions;
-
-    {
-      uint32_t last_node_id = std::numeric_limits<uint32_t>::max();
-      uint32_t old_dex_pc = 0;
-      uint32_t block_start_dex_pc = std::numeric_limits<uint32_t>::max();
-      const Instruction* inst = Instruction::At(code_item->insns_);
-      for (uint32_t dex_pc = 0;
-          dex_pc < code_item->insns_size_in_code_units_;
-          old_dex_pc = dex_pc, dex_pc += inst->SizeInCodeUnits(), inst = inst->Next()) {
-        {
-          auto it = dex_pc_to_node_id.find(dex_pc);
-          if (it != dex_pc_to_node_id.end()) {
-            if (!exception_targets.empty()) {
-              // It seems the last block had common exception handlers. Add the exception edges now.
-              uint32_t node_id = dex_pc_to_node_id.find(block_start_dex_pc)->second;
-              for (uint32_t handler_pc : exception_targets) {
-                auto node_id_it = dex_pc_to_incl_id.find(handler_pc);
-                if (node_id_it != dex_pc_to_incl_id.end()) {
-                  exception_edges << "  node" << node_id
-                      << " -> node" << node_id_it->second << ":p" << handler_pc
-                      << ";\n";
-                }
-              }
-              exception_targets.clear();
-            }
-
-            block_start_dex_pc = dex_pc;
-
-            // Seems to be a fall-through, connect to last_node_id. May be spurious edges for things
-            // like switch data.
-            uint32_t old_last = last_node_id;
-            last_node_id = it->second;
-            if (old_last != std::numeric_limits<uint32_t>::max()) {
-              regular_edges << "  node" << old_last << ":p" << old_dex_pc
-                  << " -> node" << last_node_id << ":p" << dex_pc
-                  << ";\n";
-            }
-          }
-
-          // Look at the exceptions of the first entry.
-          CatchHandlerIterator catch_it(*code_item, dex_pc);
-          for (; catch_it.HasNext(); catch_it.Next()) {
-            exception_targets.insert(catch_it.GetHandlerAddress());
-          }
-        }
-
-        // Handle instruction.
-
-        // Branch: something with at most two targets.
-        if (inst->IsBranch()) {
-          const int32_t offset = inst->GetTargetOffset();
-          const bool conditional = !inst->IsUnconditional();
-
-          auto target_it = dex_pc_to_node_id.find(dex_pc + offset);
-          if (target_it != dex_pc_to_node_id.end()) {
-            taken_edges << "  node" << last_node_id << ":p" << dex_pc
-                << " -> node" << target_it->second << ":p" << (dex_pc + offset)
-                << ";\n";
-          }
-          if (!conditional) {
-            // No fall-through.
-            last_node_id = std::numeric_limits<uint32_t>::max();
-          }
-        } else if (inst->IsSwitch()) {
-          // TODO: Iterate through all switch targets.
-          const uint16_t* insns = code_item->insns_ + dex_pc;
-          /* make sure the start of the switch is in range */
-          int32_t switch_offset = insns[1] | (static_cast<int32_t>(insns[2]) << 16);
-          /* offset to switch table is a relative branch-style offset */
-          const uint16_t* switch_insns = insns + switch_offset;
-          uint32_t switch_count = switch_insns[1];
-          int32_t targets_offset;
-          if ((*insns & 0xff) == Instruction::PACKED_SWITCH) {
-            /* 0=sig, 1=count, 2/3=firstKey */
-            targets_offset = 4;
-          } else {
-            /* 0=sig, 1=count, 2..count*2 = keys */
-            targets_offset = 2 + 2 * switch_count;
-          }
-          /* make sure the end of the switch is in range */
-          /* verify each switch target */
-          for (uint32_t targ = 0; targ < switch_count; targ++) {
-            int32_t offset =
-                static_cast<int32_t>(switch_insns[targets_offset + targ * 2]) |
-                static_cast<int32_t>(switch_insns[targets_offset + targ * 2 + 1] << 16);
-            int32_t abs_offset = dex_pc + offset;
-            auto target_it = dex_pc_to_node_id.find(abs_offset);
-            if (target_it != dex_pc_to_node_id.end()) {
-              // TODO: value label.
-              taken_edges << "  node" << last_node_id << ":p" << dex_pc
-                  << " -> node" << target_it->second << ":p" << (abs_offset)
-                  << ";\n";
-            }
-          }
-        }
-
-        // Exception edges. If this is not the first instruction in the block
-        if (block_start_dex_pc != dex_pc) {
-          std::set<uint32_t> current_handler_pcs;
-          CatchHandlerIterator catch_it(*code_item, dex_pc);
-          for (; catch_it.HasNext(); catch_it.Next()) {
-            current_handler_pcs.insert(catch_it.GetHandlerAddress());
-          }
-          if (current_handler_pcs != exception_targets) {
-            exception_targets.clear();  // Clear so we don't do something at the end.
-            blocks_with_detailed_exceptions.insert(block_start_dex_pc);
-          }
-        }
-
-        if (inst->IsReturn() ||
-            (inst->Opcode() == Instruction::THROW) ||
-            (inst->IsBranch() && inst->IsUnconditional())) {
-          // No fall-through.
-          last_node_id = std::numeric_limits<uint32_t>::max();
-        }
-      }
-      // Finish up the last block, if it had common exceptions.
-      if (!exception_targets.empty()) {
-        // It seems the last block had common exception handlers. Add the exception edges now.
-        uint32_t node_id = dex_pc_to_node_id.find(block_start_dex_pc)->second;
-        for (uint32_t handler_pc : exception_targets) {
-          auto node_id_it = dex_pc_to_incl_id.find(handler_pc);
-          if (node_id_it != dex_pc_to_incl_id.end()) {
-            exception_edges << "  node" << node_id
-                << " -> node" << node_id_it->second << ":p" << handler_pc
-                << ";\n";
-          }
-        }
-        exception_targets.clear();
-      }
-    }
-
-    // Second pass for detailed exception blocks.
-    // TODO
-    // Exception edges. If this is not the first instruction in the block
-    for (uint32_t dex_pc : blocks_with_detailed_exceptions) {
-      const Instruction* inst = Instruction::At(&code_item->insns_[dex_pc]);
-      uint32_t this_node_id = dex_pc_to_incl_id.find(dex_pc)->second;
-      while (true) {
-        CatchHandlerIterator catch_it(*code_item, dex_pc);
-        if (catch_it.HasNext()) {
-          std::set<uint32_t> handled_targets;
-          for (; catch_it.HasNext(); catch_it.Next()) {
-            uint32_t handler_pc = catch_it.GetHandlerAddress();
-            auto it = handled_targets.find(handler_pc);
-            if (it == handled_targets.end()) {
-              auto node_id_it = dex_pc_to_incl_id.find(handler_pc);
-              if (node_id_it != dex_pc_to_incl_id.end()) {
-                exception_edges << "  node" << this_node_id << ":p" << dex_pc
-                    << " -> node" << node_id_it->second << ":p" << handler_pc
-                    << ";\n";
-              }
-
-              // Mark as done.
-              handled_targets.insert(handler_pc);
-            }
-          }
-        }
-        if (inst->IsBasicBlockEnd()) {
-          break;
-        }
-
-        // Loop update. Have a break-out if the next instruction is a branch target and thus in
-        // another block.
-        dex_pc += inst->SizeInCodeUnits();
-        if (dex_pc >= code_item->insns_size_in_code_units_) {
-          break;
-        }
-        if (dex_pc_to_node_id.find(dex_pc) != dex_pc_to_node_id.end()) {
-          break;
-        }
-        inst = inst->Next();
-      }
-    }
-
-    // Write out the sub-graphs to make edges styled.
-    os << "\n";
-    os << "  subgraph regular_edges {\n";
-    os << "    edge [color=\"#000000\",weight=.3,len=3];\n\n";
-    os << "    " << regular_edges.str() << "\n";
-    os << "  }\n\n";
-
-    os << "  subgraph taken_edges {\n";
-    os << "    edge [color=\"#00FF00\",weight=.3,len=3];\n\n";
-    os << "    " << taken_edges.str() << "\n";
-    os << "  }\n\n";
-
-    os << "  subgraph exception_edges {\n";
-    os << "    edge [color=\"#FF0000\",weight=.3,len=3];\n\n";
-    os << "    " << exception_edges.str() << "\n";
-    os << "  }\n\n";
-  }
-
-  os << "}\n";
-}
-
-void DumpMethodCFG(ArtMethod* method, std::ostream& os) {
-  const DexFile* dex_file = method->GetDexFile();
-  const DexFile::CodeItem* code_item = dex_file->GetCodeItem(method->GetCodeItemOffset());
-
-  DumpMethodCFGImpl(dex_file, method->GetDexMethodIndex(), code_item, os);
-}
-
-void DumpMethodCFG(const DexFile* dex_file, uint32_t dex_method_idx, std::ostream& os) {
-  // This is painful, we need to find the code item. That means finding the class, and then
-  // iterating the table.
-  if (dex_method_idx >= dex_file->NumMethodIds()) {
-    os << "Could not find method-idx.";
-    return;
-  }
-  const DexFile::MethodId& method_id = dex_file->GetMethodId(dex_method_idx);
-
-  const DexFile::ClassDef* class_def = dex_file->FindClassDef(method_id.class_idx_);
-  if (class_def == nullptr) {
-    os << "Could not find class-def.";
-    return;
-  }
-
-  const uint8_t* class_data = dex_file->GetClassData(*class_def);
-  if (class_data == nullptr) {
-    os << "No class data.";
-    return;
-  }
-
-  ClassDataItemIterator it(*dex_file, class_data);
-  // Skip fields
-  while (it.HasNextStaticField() || it.HasNextInstanceField()) {
-    it.Next();
-  }
-
-  // Find method, and dump it.
-  while (it.HasNextDirectMethod() || it.HasNextVirtualMethod()) {
-    uint32_t method_idx = it.GetMemberIndex();
-    if (method_idx == dex_method_idx) {
-      DumpMethodCFGImpl(dex_file, dex_method_idx, it.GetMethodCodeItem(), os);
-      return;
-    }
-    it.Next();
-  }
-
-  // Otherwise complain.
-  os << "Something went wrong, didn't find the method in the class data.";
-}
-
 static void ParseStringAfterChar(const std::string& s,
                                  char c,
                                  std::string* parsed_value,
diff --git a/runtime/utils.h b/runtime/utils.h
index ea9e8f7..1e98057 100644
--- a/runtime/utils.h
+++ b/runtime/utils.h
@@ -30,26 +30,12 @@
 #include "arch/instruction_set.h"
 #include "base/casts.h"
 #include "base/logging.h"
-#include "base/mutex.h"
 #include "base/stringpiece.h"
 #include "globals.h"
-#include "obj_ptr.h"
 #include "primitive.h"
 
-class BacktraceMap;
-
 namespace art {
 
-class ArtField;
-class ArtMethod;
-class DexFile;
-
-namespace mirror {
-class Class;
-class Object;
-class String;
-}  // namespace mirror
-
 template <typename T>
 bool ParseUint(const char *in, T* out) {
   char* end;
@@ -136,44 +122,12 @@
 // Returns a human-readable equivalent of 'descriptor'. So "I" would be "int",
 // "[[I" would be "int[][]", "[Ljava/lang/String;" would be
 // "java.lang.String[]", and so forth.
-std::string PrettyStringDescriptor(ObjPtr<mirror::String> descriptor)
-    REQUIRES_SHARED(Locks::mutator_lock_);
 std::string PrettyDescriptor(const char* descriptor);
-std::string PrettyDescriptor(ObjPtr<mirror::Class> klass)
-    REQUIRES_SHARED(Locks::mutator_lock_);
 std::string PrettyDescriptor(Primitive::Type type);
 
-// Returns a human-readable signature for 'f'. Something like "a.b.C.f" or
-// "int a.b.C.f" (depending on the value of 'with_type').
-std::string PrettyField(ArtField* f, bool with_type = true)
-    REQUIRES_SHARED(Locks::mutator_lock_);
-std::string PrettyField(uint32_t field_idx, const DexFile& dex_file, bool with_type = true);
-
-// Returns a human-readable signature for 'm'. Something like "a.b.C.m" or
-// "a.b.C.m(II)V" (depending on the value of 'with_signature').
-std::string PrettyMethod(ArtMethod* m, bool with_signature = true)
-    REQUIRES_SHARED(Locks::mutator_lock_);
-std::string PrettyMethod(uint32_t method_idx, const DexFile& dex_file, bool with_signature = true);
-
-// Returns a human-readable form of the name of the *class* of the given object.
-// So given an instance of java.lang.String, the output would
-// be "java.lang.String". Given an array of int, the output would be "int[]".
-// Given String.class, the output would be "java.lang.Class<java.lang.String>".
-std::string PrettyTypeOf(ObjPtr<mirror::Object> obj)
-    REQUIRES_SHARED(Locks::mutator_lock_);
-
-// Returns a human-readable form of the type at an index in the specified dex file.
-// Example outputs: char[], java.lang.String.
-std::string PrettyType(uint32_t type_idx, const DexFile& dex_file);
-
-// Returns a human-readable form of the name of the given class.
-// Given String.class, the output would be "java.lang.Class<java.lang.String>".
-std::string PrettyClass(ObjPtr<mirror::Class> c)
-    REQUIRES_SHARED(Locks::mutator_lock_);
-
-// Returns a human-readable form of the name of the given class with its class loader.
-std::string PrettyClassAndClassLoader(ObjPtr<mirror::Class> c)
-    REQUIRES_SHARED(Locks::mutator_lock_);
+// Utilities for printing the types for method signatures.
+std::string PrettyArguments(const char* signature);
+std::string PrettyReturnType(const char* signature);
 
 // Returns a human-readable version of the Java part of the access flags, e.g., "private static "
 // (note the trailing whitespace).
@@ -206,13 +160,6 @@
 // additionally allowing names that begin with '<' and end with '>'.
 bool IsValidMemberName(const char* s);
 
-// Returns the JNI native function name for the non-overloaded method 'm'.
-std::string JniShortName(ArtMethod* m)
-    REQUIRES_SHARED(Locks::mutator_lock_);
-// Returns the JNI native function name for the overloaded method 'm'.
-std::string JniLongName(ArtMethod* m)
-    REQUIRES_SHARED(Locks::mutator_lock_);
-
 bool ReadFileToString(const std::string& file_name, std::string* result);
 bool PrintFileToLog(const std::string& file_name, LogSeverity level);
 
@@ -321,9 +268,6 @@
   return pointer_size == 4 || pointer_size == 8;
 }
 
-void DumpMethodCFG(ArtMethod* method, std::ostream& os) REQUIRES_SHARED(Locks::mutator_lock_);
-void DumpMethodCFG(const DexFile* dex_file, uint32_t dex_method_idx, std::ostream& os);
-
 static inline const void* EntryPointToCodePointer(const void* entry_point) {
   uintptr_t code = reinterpret_cast<uintptr_t>(entry_point);
   // TODO: Make this Thumb2 specific. It is benign on other architectures as code is always at
@@ -335,24 +279,34 @@
 using UsageFn = void (*)(const char*, ...);
 
 template <typename T>
-static void ParseUintOption(const StringPiece& option,
+static void ParseIntOption(const StringPiece& option,
                             const std::string& option_name,
                             T* out,
-                            UsageFn Usage,
+                            UsageFn usage,
                             bool is_long_option = true) {
   std::string option_prefix = option_name + (is_long_option ? "=" : "");
   DCHECK(option.starts_with(option_prefix)) << option << " " << option_prefix;
   const char* value_string = option.substr(option_prefix.size()).data();
   int64_t parsed_integer_value = 0;
   if (!ParseInt(value_string, &parsed_integer_value)) {
-    Usage("Failed to parse %s '%s' as an integer", option_name.c_str(), value_string);
-  }
-  if (parsed_integer_value < 0) {
-    Usage("%s passed a negative value %d", option_name.c_str(), parsed_integer_value);
+    usage("Failed to parse %s '%s' as an integer", option_name.c_str(), value_string);
   }
   *out = dchecked_integral_cast<T>(parsed_integer_value);
 }
 
+template <typename T>
+static void ParseUintOption(const StringPiece& option,
+                            const std::string& option_name,
+                            T* out,
+                            UsageFn usage,
+                            bool is_long_option = true) {
+  ParseIntOption(option, option_name, out, usage, is_long_option);
+  if (*out < 0) {
+    usage("%s passed a negative value %d", option_name.c_str(), *out);
+    *out = 0;
+  }
+}
+
 void ParseDouble(const std::string& option,
                  char after_char,
                  double min,
diff --git a/runtime/utils/dex_cache_arrays_layout-inl.h b/runtime/utils/dex_cache_arrays_layout-inl.h
index 5ccd446..c7875b5 100644
--- a/runtime/utils/dex_cache_arrays_layout-inl.h
+++ b/runtime/utils/dex_cache_arrays_layout-inl.h
@@ -39,7 +39,7 @@
       fields_offset_(
           RoundUp(strings_offset_ + StringsSize(header.string_ids_size_), FieldsAlignment())),
       method_types_offset_(
-          RoundUp(fields_offset_ + FieldsSize(header.field_ids_size_), Alignment())),
+          RoundUp(fields_offset_ + FieldsSize(header.field_ids_size_), MethodTypesAlignment())),
       size_(
           RoundUp(method_types_offset_ + MethodTypesSize(header.proto_ids_size_), Alignment())) {
 }
@@ -51,7 +51,11 @@
 inline constexpr size_t DexCacheArraysLayout::Alignment() {
   // GcRoot<> alignment is 4, i.e. lower than or equal to the pointer alignment.
   static_assert(alignof(GcRoot<mirror::Class>) == 4, "Expecting alignof(GcRoot<>) == 4");
-  static_assert(alignof(mirror::StringDexCacheType) == 8, "Expecting alignof(StringDexCacheType) == 8");
+  static_assert(alignof(mirror::StringDexCacheType) == 8,
+                "Expecting alignof(StringDexCacheType) == 8");
+  static_assert(alignof(mirror::MethodTypeDexCacheType) == 8,
+                "Expecting alignof(MethodTypeDexCacheType) == 8");
+  // This is the same as alignof(MethodTypeDexCacheType).
   return alignof(mirror::StringDexCacheType);
 }
 
@@ -66,10 +70,7 @@
 }
 
 inline size_t DexCacheArraysLayout::TypesSize(size_t num_elements) const {
-  // App image patching relies on having enough room for a forwarding pointer in the types array.
-  // See FixupArtMethodArrayVisitor and ClassLinker::AddImageSpace.
-  return std::max(ArraySize(GcRootAsPointerSize<mirror::Class>(), num_elements),
-                  static_cast<size_t>(pointer_size_));
+  return ArraySize(GcRootAsPointerSize<mirror::Class>(), num_elements);
 }
 
 inline size_t DexCacheArraysLayout::TypesAlignment() const {
@@ -81,8 +82,7 @@
 }
 
 inline size_t DexCacheArraysLayout::MethodsSize(size_t num_elements) const {
-  // App image patching relies on having enough room for a forwarding pointer in the methods array.
-  return std::max(ArraySize(pointer_size_, num_elements), static_cast<size_t>(pointer_size_));
+  return ArraySize(pointer_size_, num_elements);
 }
 
 inline size_t DexCacheArraysLayout::MethodsAlignment() const {
@@ -120,12 +120,6 @@
   return static_cast<size_t>(pointer_size_);
 }
 
-inline size_t DexCacheArraysLayout::MethodTypeOffset(uint32_t proto_idx) const {
-  return strings_offset_
-      + ElementOffset(PointerSize::k64,
-                      proto_idx % mirror::DexCache::kDexCacheMethodTypeCacheSize);
-}
-
 inline size_t DexCacheArraysLayout::MethodTypesSize(size_t num_elements) const {
   size_t cache_size = mirror::DexCache::kDexCacheMethodTypeCacheSize;
   if (num_elements < cache_size) {
diff --git a/runtime/utils/dex_cache_arrays_layout.h b/runtime/utils/dex_cache_arrays_layout.h
index e222b46..ae3bfab 100644
--- a/runtime/utils/dex_cache_arrays_layout.h
+++ b/runtime/utils/dex_cache_arrays_layout.h
@@ -99,8 +99,6 @@
     return method_types_offset_;
   }
 
-  size_t MethodTypeOffset(uint32_t method_type_idx) const;
-
   size_t MethodTypesSize(size_t num_elements) const;
 
   size_t MethodTypesAlignment() const;
diff --git a/runtime/utils_test.cc b/runtime/utils_test.cc
index ef42222..be4d394 100644
--- a/runtime/utils_test.cc
+++ b/runtime/utils_test.cc
@@ -109,51 +109,52 @@
 
 TEST_F(UtilsTest, PrettyTypeOf) {
   ScopedObjectAccess soa(Thread::Current());
-  EXPECT_EQ("null", PrettyTypeOf(nullptr));
+  EXPECT_EQ("null", mirror::Object::PrettyTypeOf(nullptr));
 
   StackHandleScope<2> hs(soa.Self());
   Handle<mirror::String> s(hs.NewHandle(mirror::String::AllocFromModifiedUtf8(soa.Self(), "")));
-  EXPECT_EQ("java.lang.String", PrettyTypeOf(s.Get()));
+  EXPECT_EQ("java.lang.String", mirror::Object::PrettyTypeOf(s.Get()));
 
   Handle<mirror::ShortArray> a(hs.NewHandle(mirror::ShortArray::Alloc(soa.Self(), 2)));
-  EXPECT_EQ("short[]", PrettyTypeOf(a.Get()));
+  EXPECT_EQ("short[]", mirror::Object::PrettyTypeOf(a.Get()));
 
   mirror::Class* c = class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/String;");
   ASSERT_TRUE(c != nullptr);
   mirror::Object* o = mirror::ObjectArray<mirror::String>::Alloc(soa.Self(), c, 0);
-  EXPECT_EQ("java.lang.String[]", PrettyTypeOf(o));
-  EXPECT_EQ("java.lang.Class<java.lang.String[]>", PrettyTypeOf(o->GetClass()));
+  EXPECT_EQ("java.lang.String[]", mirror::Object::PrettyTypeOf(o));
+  EXPECT_EQ("java.lang.Class<java.lang.String[]>", mirror::Object::PrettyTypeOf(o->GetClass()));
 }
 
 TEST_F(UtilsTest, PrettyClass) {
   ScopedObjectAccess soa(Thread::Current());
-  EXPECT_EQ("null", PrettyClass(nullptr));
+  EXPECT_EQ("null", mirror::Class::PrettyClass(nullptr));
   mirror::Class* c = class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/String;");
   ASSERT_TRUE(c != nullptr);
   mirror::Object* o = mirror::ObjectArray<mirror::String>::Alloc(soa.Self(), c, 0);
-  EXPECT_EQ("java.lang.Class<java.lang.String[]>", PrettyClass(o->GetClass()));
+  EXPECT_EQ("java.lang.Class<java.lang.String[]>", mirror::Class::PrettyClass(o->GetClass()));
 }
 
 TEST_F(UtilsTest, PrettyClassAndClassLoader) {
   ScopedObjectAccess soa(Thread::Current());
-  EXPECT_EQ("null", PrettyClassAndClassLoader(nullptr));
+  EXPECT_EQ("null", mirror::Class::PrettyClassAndClassLoader(nullptr));
   mirror::Class* c = class_linker_->FindSystemClass(soa.Self(), "[Ljava/lang/String;");
   ASSERT_TRUE(c != nullptr);
   mirror::Object* o = mirror::ObjectArray<mirror::String>::Alloc(soa.Self(), c, 0);
-  EXPECT_EQ("java.lang.Class<java.lang.String[],null>", PrettyClassAndClassLoader(o->GetClass()));
+  EXPECT_EQ("java.lang.Class<java.lang.String[],null>",
+            mirror::Class::PrettyClassAndClassLoader(o->GetClass()));
 }
 
 TEST_F(UtilsTest, PrettyField) {
   ScopedObjectAccess soa(Thread::Current());
-  EXPECT_EQ("null", PrettyField(nullptr));
+  EXPECT_EQ("null", ArtField::PrettyField(nullptr));
 
   mirror::Class* java_lang_String = class_linker_->FindSystemClass(soa.Self(),
                                                                    "Ljava/lang/String;");
 
   ArtField* f;
   f = java_lang_String->FindDeclaredInstanceField("count", "I");
-  EXPECT_EQ("int java.lang.String.count", PrettyField(f));
-  EXPECT_EQ("java.lang.String.count", PrettyField(f, false));
+  EXPECT_EQ("int java.lang.String.count", f->PrettyField());
+  EXPECT_EQ("java.lang.String.count", f->PrettyField(false));
 }
 
 TEST_F(UtilsTest, PrettySize) {
@@ -192,18 +193,18 @@
 
   m = c->FindVirtualMethod("charAt", "(I)C", kRuntimePointerSize);
   ASSERT_TRUE(m != nullptr);
-  EXPECT_EQ("Java_java_lang_String_charAt", JniShortName(m));
-  EXPECT_EQ("Java_java_lang_String_charAt__I", JniLongName(m));
+  EXPECT_EQ("Java_java_lang_String_charAt", m->JniShortName());
+  EXPECT_EQ("Java_java_lang_String_charAt__I", m->JniLongName());
 
   m = c->FindVirtualMethod("indexOf", "(Ljava/lang/String;I)I", kRuntimePointerSize);
   ASSERT_TRUE(m != nullptr);
-  EXPECT_EQ("Java_java_lang_String_indexOf", JniShortName(m));
-  EXPECT_EQ("Java_java_lang_String_indexOf__Ljava_lang_String_2I", JniLongName(m));
+  EXPECT_EQ("Java_java_lang_String_indexOf", m->JniShortName());
+  EXPECT_EQ("Java_java_lang_String_indexOf__Ljava_lang_String_2I", m->JniLongName());
 
   m = c->FindDirectMethod("copyValueOf", "([CII)Ljava/lang/String;", kRuntimePointerSize);
   ASSERT_TRUE(m != nullptr);
-  EXPECT_EQ("Java_java_lang_String_copyValueOf", JniShortName(m));
-  EXPECT_EQ("Java_java_lang_String_copyValueOf___3CII", JniLongName(m));
+  EXPECT_EQ("Java_java_lang_String_copyValueOf", m->JniShortName());
+  EXPECT_EQ("Java_java_lang_String_copyValueOf___3CII", m->JniLongName());
 }
 
 TEST_F(UtilsTest, Split) {
diff --git a/runtime/vdex_file.cc b/runtime/vdex_file.cc
index 9fbf875..b3dab58 100644
--- a/runtime/vdex_file.cc
+++ b/runtime/vdex_file.cc
@@ -34,9 +34,12 @@
   return (memcmp(version_, kVdexVersion, sizeof(kVdexVersion)) == 0);
 }
 
-VdexFile::Header::Header(uint32_t dex_size, uint32_t verifier_deps_size)
+VdexFile::Header::Header(uint32_t dex_size,
+                         uint32_t verifier_deps_size,
+                         uint32_t quickening_info_size)
     : dex_size_(dex_size),
-      verifier_deps_size_(verifier_deps_size) {
+      verifier_deps_size_(verifier_deps_size),
+      quickening_info_size_(quickening_info_size) {
   memcpy(magic_, kVdexMagic, sizeof(kVdexMagic));
   memcpy(version_, kVdexVersion, sizeof(kVdexVersion));
   DCHECK(IsMagicValid());
diff --git a/runtime/vdex_file.h b/runtime/vdex_file.h
index 6bea153..edd6ffe 100644
--- a/runtime/vdex_file.h
+++ b/runtime/vdex_file.h
@@ -20,6 +20,7 @@
 #include <stdint.h>
 #include <string>
 
+#include "base/array_ref.h"
 #include "base/macros.h"
 #include "mem_map.h"
 #include "os.h"
@@ -42,13 +43,17 @@
  public:
   struct Header {
    public:
-    Header(uint32_t dex_size, uint32_t verifier_deps_size);
+    Header(uint32_t dex_size, uint32_t verifier_deps_size, uint32_t quickening_info_size);
 
+    const char* GetMagic() const { return reinterpret_cast<const char*>(magic_); }
+    const char* GetVersion() const { return reinterpret_cast<const char*>(version_); }
     bool IsMagicValid() const;
     bool IsVersionValid() const;
+    bool IsValid() const { return IsMagicValid() && IsVersionValid(); }
 
     uint32_t GetDexSize() const { return dex_size_; }
     uint32_t GetVerifierDepsSize() const { return verifier_deps_size_; }
+    uint32_t GetQuickeningInfoSize() const { return quickening_info_size_; }
 
    private:
     static constexpr uint8_t kVdexMagic[] = { 'v', 'd', 'e', 'x' };
@@ -58,6 +63,7 @@
     uint8_t version_[4];
     uint32_t dex_size_;
     uint32_t verifier_deps_size_;
+    uint32_t quickening_info_size_;
   };
 
   static VdexFile* Open(const std::string& vdex_filename,
@@ -69,6 +75,15 @@
   const uint8_t* End() const { return mmap_->End(); }
   size_t Size() const { return mmap_->Size(); }
 
+  const Header& GetHeader() const {
+    return *reinterpret_cast<const Header*>(Begin());
+  }
+
+  ArrayRef<const uint8_t> GetVerifierDepsData() const {
+    return ArrayRef<const uint8_t>(
+        Begin() + sizeof(Header) + GetHeader().GetDexSize(), GetHeader().GetVerifierDepsSize());
+  }
+
  private:
   explicit VdexFile(MemMap* mmap) : mmap_(mmap) {}
 
diff --git a/runtime/verifier/method_verifier.cc b/runtime/verifier/method_verifier.cc
index 181673c..d9e3ea7 100644
--- a/runtime/verifier/method_verifier.cc
+++ b/runtime/verifier/method_verifier.cc
@@ -141,13 +141,13 @@
     failure_message = " that has no super class";
   } else if (super != nullptr && super->IsFinal()) {
     early_failure = true;
-    failure_message = " that attempts to sub-class final class " + PrettyDescriptor(super);
+    failure_message = " that attempts to sub-class final class " + super->PrettyDescriptor();
   } else if (class_def == nullptr) {
     early_failure = true;
     failure_message = " that isn't present in dex file " + dex_file.GetLocation();
   }
   if (early_failure) {
-    *error = "Verifier rejected class " + PrettyDescriptor(klass) + failure_message;
+    *error = "Verifier rejected class " + klass->PrettyDescriptor() + failure_message;
     if (callbacks != nullptr) {
       ClassReference ref(&dex_file, klass->GetDexClassDefIndex());
       callbacks->ClassRejected(ref);
@@ -395,7 +395,7 @@
     if (verifier.failures_.size() != 0) {
       if (VLOG_IS_ON(verifier)) {
         verifier.DumpFailures(VLOG_STREAM(verifier) << "Soft verification failures in "
-                                                    << PrettyMethod(method_idx, *dex_file) << "\n");
+                                                    << dex_file->PrettyMethod(method_idx) << "\n");
       }
       result.kind = kSoftFailure;
       if (method != nullptr &&
@@ -441,7 +441,7 @@
             UNREACHABLE();
         }
         verifier.DumpFailures(LOG_STREAM(severity) << "Verification error in "
-                                                   << PrettyMethod(method_idx, *dex_file)
+                                                   << dex_file->PrettyMethod(method_idx)
                                                    << "\n");
       }
       if (hard_failure_msg != nullptr) {
@@ -465,7 +465,7 @@
   if (kTimeVerifyMethod) {
     uint64_t duration_ns = NanoTime() - start_ns;
     if (duration_ns > MsToNs(100)) {
-      LOG(WARNING) << "Verification of " << PrettyMethod(method_idx, *dex_file)
+      LOG(WARNING) << "Verification of " << dex_file->PrettyMethod(method_idx)
                    << " took " << PrettyDuration(duration_ns)
                    << (IsLargeMethod(code_item) ? " (large method)" : "");
     }
@@ -722,7 +722,7 @@
     }
     is_constructor_ = true;
   } else if (constructor_by_name) {
-    LOG(WARNING) << "Method " << PrettyMethod(dex_method_idx_, *dex_file_)
+    LOG(WARNING) << "Method " << dex_file_->PrettyMethod(dex_method_idx_)
                  << " not marked as constructor.";
     is_constructor_ = true;
   }
@@ -935,7 +935,7 @@
     }
   }
   failures_.push_back(error);
-  std::string location(StringPrintf("%s: [0x%X] ", PrettyMethod(dex_method_idx_, *dex_file_).c_str(),
+  std::string location(StringPrintf("%s: [0x%X] ", dex_file_->PrettyMethod(dex_method_idx_).c_str(),
                                     work_insn_idx_));
   std::ostringstream* failure_message = new std::ostringstream(location, std::ostringstream::ate);
   failure_messages_.push_back(failure_message);
@@ -943,7 +943,7 @@
 }
 
 std::ostream& MethodVerifier::LogVerifyInfo() {
-  return info_messages_ << "VFY: " << PrettyMethod(dex_method_idx_, *dex_file_)
+  return info_messages_ << "VFY: " << dex_file_->PrettyMethod(dex_method_idx_)
                         << '[' << reinterpret_cast<void*>(work_insn_idx_) << "] : ";
 }
 
@@ -956,7 +956,7 @@
   delete last_fail_message;
 }
 
-void MethodVerifier::AppendToLastFailMessage(std::string append) {
+void MethodVerifier::AppendToLastFailMessage(const std::string& append) {
   size_t failure_num = failure_messages_.size();
   DCHECK_NE(failure_num, 0U);
   std::ostringstream* last_fail_message = failure_messages_[failure_num - 1];
@@ -1589,7 +1589,7 @@
   if (!SetTypesFromSignature()) {
     DCHECK_NE(failures_.size(), 0U);
     std::string prepend("Bad signature in ");
-    prepend += PrettyMethod(dex_method_idx_, *dex_file_);
+    prepend += dex_file_->PrettyMethod(dex_method_idx_);
     PrependToLastFailMessage(prepend);
     return false;
   }
@@ -1866,7 +1866,7 @@
         if (work_line_->CompareLine(register_line) != 0) {
           Dump(std::cout);
           std::cout << info_messages_.str();
-          LOG(FATAL) << "work_line diverged in " << PrettyMethod(dex_method_idx_, *dex_file_)
+          LOG(FATAL) << "work_line diverged in " << dex_file_->PrettyMethod(dex_method_idx_)
                      << "@" << reinterpret_cast<void*>(work_insn_idx_) << "\n"
                      << " work_line=" << work_line_->Dump(this) << "\n"
                      << "  expected=" << register_line->Dump(this);
@@ -1874,7 +1874,7 @@
       }
     }
     if (!CodeFlowVerifyInstruction(&start_guess)) {
-      std::string prepend(PrettyMethod(dex_method_idx_, *dex_file_));
+      std::string prepend(dex_file_->PrettyMethod(dex_method_idx_));
       prepend += " failed to verify: ";
       PrependToLastFailMessage(prepend);
       return false;
@@ -1925,7 +1925,7 @@
                       << "-" << reinterpret_cast<void*>(insn_idx - 1);
     }
     // To dump the state of the verify after a method, do something like:
-    // if (PrettyMethod(dex_method_idx_, *dex_file_) ==
+    // if (dex_file_->PrettyMethod(dex_method_idx_) ==
     //     "boolean java.lang.String.equals(java.lang.Object)") {
     //   LOG(INFO) << info_messages_.str();
     // }
@@ -2994,7 +2994,7 @@
         mirror::Class* called_interface = abs_method->GetDeclaringClass();
         if (!called_interface->IsInterface() && !called_interface->IsObjectClass()) {
           Fail(VERIFY_ERROR_CLASS_CHANGE) << "expected interface class in invoke-interface '"
-              << PrettyMethod(abs_method) << "'";
+              << abs_method->PrettyMethod() << "'";
           break;
         }
       }
@@ -3278,7 +3278,7 @@
         for (uint32_t i = 0, num_fields = klass->NumInstanceFields(); i < num_fields; ++i) {
           if (klass->GetInstanceField(i)->IsFinal()) {
             Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "return-void-no-barrier not expected for "
-                << PrettyField(klass->GetInstanceField(i));
+                << klass->GetInstanceField(i)->PrettyField();
             break;
           }
         }
@@ -3787,7 +3787,7 @@
 
   if (res_method == nullptr) {
     Fail(VERIFY_ERROR_NO_METHOD) << "couldn't find method "
-                                 << PrettyDescriptor(klass) << "."
+                                 << klass->PrettyDescriptor() << "."
                                  << dex_file_->GetMethodName(method_id) << " "
                                  << dex_file_->GetMethodSignature(method_id);
     return nullptr;
@@ -3797,13 +3797,13 @@
   // enforce them here.
   if (res_method->IsConstructor() && method_type != METHOD_DIRECT) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "rejecting non-direct call to constructor "
-                                      << PrettyMethod(res_method);
+                                      << res_method->PrettyMethod();
     return nullptr;
   }
   // Disallow any calls to class initializers.
   if (res_method->IsClassInitializer()) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "rejecting call to class initializer "
-                                      << PrettyMethod(res_method);
+                                      << res_method->PrettyMethod();
     return nullptr;
   }
 
@@ -3821,15 +3821,15 @@
          method_type != METHOD_DIRECT) &&
         method_type != METHOD_SUPER) {
       Fail(VERIFY_ERROR_CLASS_CHANGE)
-          << "non-interface method " << PrettyMethod(dex_method_idx, *dex_file_)
-          << " is in an interface class " << PrettyClass(klass);
+          << "non-interface method " << dex_file_->PrettyMethod(dex_method_idx)
+          << " is in an interface class " << klass->PrettyClass();
       return nullptr;
     }
   } else {
     if (method_type == METHOD_INTERFACE) {
       Fail(VERIFY_ERROR_CLASS_CHANGE)
-          << "interface method " << PrettyMethod(dex_method_idx, *dex_file_)
-          << " is in a non-interface class " << PrettyClass(klass);
+          << "interface method " << dex_file_->PrettyMethod(dex_method_idx)
+          << " is in a non-interface class " << klass->PrettyClass();
       return nullptr;
     }
   }
@@ -3841,14 +3841,15 @@
 
   // Check if access is allowed.
   if (!referrer.CanAccessMember(res_method->GetDeclaringClass(), res_method->GetAccessFlags())) {
-    Fail(VERIFY_ERROR_ACCESS_METHOD) << "illegal method access (call " << PrettyMethod(res_method)
+    Fail(VERIFY_ERROR_ACCESS_METHOD) << "illegal method access (call "
+                                     << res_method->PrettyMethod()
                                      << " from " << referrer << ")";
     return res_method;
   }
   // Check that invoke-virtual and invoke-super are not used on private methods of the same class.
   if (res_method->IsPrivate() && (method_type == METHOD_VIRTUAL || method_type == METHOD_SUPER)) {
     Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "invoke-super/virtual can't be used on private method "
-                                      << PrettyMethod(res_method);
+                                      << res_method->PrettyMethod();
     return nullptr;
   }
   // See if the method type implied by the invoke instruction matches the access flags for the
@@ -3860,7 +3861,7 @@
         method_type == METHOD_INTERFACE) && res_method->IsDirect())
       ) {
     Fail(VERIFY_ERROR_CLASS_CHANGE) << "invoke type (" << method_type << ") does not match method "
-                                       " type of " << PrettyMethod(res_method);
+                                       " type of " << res_method->PrettyMethod();
     return nullptr;
   }
   return res_method;
@@ -4090,24 +4091,25 @@
         return nullptr;
       } else if (!reference_type.IsStrictlyAssignableFrom(GetDeclaringClass(), this)) {
         Fail(VERIFY_ERROR_CLASS_CHANGE)
-            << "invoke-super in " << PrettyClass(GetDeclaringClass().GetClass()) << " in method "
-            << PrettyMethod(dex_method_idx_, *dex_file_) << " to method "
-            << PrettyMethod(method_idx, *dex_file_) << " references "
-            << "non-super-interface type " << PrettyClass(reference_type.GetClass());
+            << "invoke-super in " << mirror::Class::PrettyClass(GetDeclaringClass().GetClass())
+            << " in method "
+            << dex_file_->PrettyMethod(dex_method_idx_) << " to method "
+            << dex_file_->PrettyMethod(method_idx) << " references "
+            << "non-super-interface type " << mirror::Class::PrettyClass(reference_type.GetClass());
         return nullptr;
       }
     } else {
       const RegType& super = GetDeclaringClass().GetSuperClass(&reg_types_);
       if (super.IsUnresolvedTypes()) {
         Fail(VERIFY_ERROR_NO_METHOD) << "unknown super class in invoke-super from "
-                                    << PrettyMethod(dex_method_idx_, *dex_file_)
-                                    << " to super " << PrettyMethod(res_method);
+                                    << dex_file_->PrettyMethod(dex_method_idx_)
+                                    << " to super " << res_method->PrettyMethod();
         return nullptr;
       }
       if (!reference_type.IsStrictlyAssignableFrom(GetDeclaringClass(), this) ||
           (res_method->GetMethodIndex() >= super.GetClass()->GetVTableLength())) {
         Fail(VERIFY_ERROR_NO_METHOD) << "invalid invoke-super from "
-                                    << PrettyMethod(dex_method_idx_, *dex_file_)
+                                    << dex_file_->PrettyMethod(dex_method_idx_)
                                     << " to super " << super
                                     << "." << res_method->GetName()
                                     << res_method->GetSignature();
@@ -4172,7 +4174,7 @@
 
 ArtMethod* MethodVerifier::VerifyInvokeVirtualQuickArgs(const Instruction* inst, bool is_range) {
   DCHECK(Runtime::Current()->IsStarted() || verify_to_dump_)
-      << PrettyMethod(dex_method_idx_, *dex_file_, true) << "@" << work_insn_idx_;
+      << dex_file_->PrettyMethod(dex_method_idx_, true) << "@" << work_insn_idx_;
 
   ArtMethod* res_method = GetQuickInvokedMethod(inst, work_line_.get(), is_range, false);
   if (res_method == nullptr) {
@@ -4242,7 +4244,8 @@
   size_t actual_args = 1;
   for (size_t param_index = 0; param_index < params_size; param_index++) {
     if (actual_args >= expected_args) {
-      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invalid call to '" << PrettyMethod(res_method)
+      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invalid call to '"
+                                        << res_method->PrettyMethod()
                                         << "'. Expected " << expected_args
                                          << " arguments, processing argument " << actual_args
                                         << " (where longs/doubles count twice).";
@@ -4251,7 +4254,8 @@
     const char* descriptor =
         res_method->GetTypeDescriptorFromTypeIdx(params->GetTypeItem(param_index).type_idx_);
     if (descriptor == nullptr) {
-      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation of " << PrettyMethod(res_method)
+      Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation of "
+                                        << res_method->PrettyMethod()
                                         << " missing signature component";
       return nullptr;
     }
@@ -4263,8 +4267,9 @@
     actual_args = reg_type.IsLongOrDoubleTypes() ? actual_args + 2 : actual_args + 1;
   }
   if (actual_args != expected_args) {
-    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation of " << PrettyMethod(res_method)
-              << " expected " << expected_args << " arguments, found " << actual_args;
+    Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "Rejecting invocation of "
+                                      << res_method->PrettyMethod() << " expected "
+                                      << expected_args << " arguments, found " << actual_args;
     return nullptr;
   } else {
     return res_method;
@@ -4522,11 +4527,11 @@
     return nullptr;
   } else if (!GetDeclaringClass().CanAccessMember(field->GetDeclaringClass(),
                                                   field->GetAccessFlags())) {
-    Fail(VERIFY_ERROR_ACCESS_FIELD) << "cannot access static field " << PrettyField(field)
+    Fail(VERIFY_ERROR_ACCESS_FIELD) << "cannot access static field " << field->PrettyField()
                                     << " from " << GetDeclaringClass();
     return nullptr;
   } else if (!field->IsStatic()) {
-    Fail(VERIFY_ERROR_CLASS_CHANGE) << "expected field " << PrettyField(field) << " to be static";
+    Fail(VERIFY_ERROR_CLASS_CHANGE) << "expected field " << field->PrettyField() << " to be static";
     return nullptr;
   }
   return field;
@@ -4581,9 +4586,9 @@
       if (!obj_type.IsUninitializedThisReference() ||
           !IsConstructor() ||
           !field_klass.Equals(GetDeclaringClass())) {
-        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "cannot access instance field " << PrettyField(field)
+        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "cannot access instance field " << field->PrettyField()
                                           << " of a not fully initialized object within the context"
-                                          << " of " << PrettyMethod(dex_method_idx_, *dex_file_);
+                                          << " of " << dex_file_->PrettyMethod(dex_method_idx_);
         return nullptr;
       }
     } else if (!field_klass.IsAssignableFrom(obj_type, this)) {
@@ -4600,7 +4605,7 @@
         // and still missing classes. This is a hard failure.
         type = VerifyError::VERIFY_ERROR_BAD_CLASS_HARD;
       }
-      Fail(type) << "cannot access instance field " << PrettyField(field)
+      Fail(type) << "cannot access instance field " << field->PrettyField()
                  << " from object of type " << obj_type;
       return nullptr;
     }
@@ -4609,11 +4614,11 @@
   // Few last soft failure checks.
   if (!GetDeclaringClass().CanAccessMember(field->GetDeclaringClass(),
                                            field->GetAccessFlags())) {
-    Fail(VERIFY_ERROR_ACCESS_FIELD) << "cannot access instance field " << PrettyField(field)
+    Fail(VERIFY_ERROR_ACCESS_FIELD) << "cannot access instance field " << field->PrettyField()
                                     << " from " << GetDeclaringClass();
     return nullptr;
   } else if (field->IsStatic()) {
-    Fail(VERIFY_ERROR_CLASS_CHANGE) << "expected field " << PrettyField(field)
+    Fail(VERIFY_ERROR_CLASS_CHANGE) << "expected field " << field->PrettyField()
                                     << " to not be static";
     return nullptr;
   }
@@ -4649,12 +4654,12 @@
       if (field == nullptr) {
         Fail(VERIFY_ERROR_BAD_CLASS_SOFT) << "Might be accessing a superclass instance field prior "
                                           << "to the superclass being initialized in "
-                                          << PrettyMethod(dex_method_idx_, *dex_file_);
+                                          << dex_file_->PrettyMethod(dex_method_idx_);
       } else if (field->GetDeclaringClass() != GetDeclaringClass().GetClass()) {
         Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "cannot access superclass instance field "
-                                          << PrettyField(field) << " of a not fully initialized "
+                                          << field->PrettyField() << " of a not fully initialized "
                                           << "object within the context of "
-                                          << PrettyMethod(dex_method_idx_, *dex_file_);
+                                          << dex_file_->PrettyMethod(dex_method_idx_);
         return;
       }
     }
@@ -4663,7 +4668,7 @@
   if (field != nullptr) {
     if (kAccType == FieldAccessType::kAccPut) {
       if (field->IsFinal() && field->GetDeclaringClass() != GetDeclaringClass().GetClass()) {
-        Fail(VERIFY_ERROR_ACCESS_FIELD) << "cannot modify final field " << PrettyField(field)
+        Fail(VERIFY_ERROR_ACCESS_FIELD) << "cannot modify final field " << field->PrettyField()
                                         << " from other class " << GetDeclaringClass();
         // Keep hunting for possible hard fails.
       }
@@ -4700,7 +4705,7 @@
         // should have been consistent within the same file at compile time.
         VerifyError error = field_type->IsReferenceTypes() ? VERIFY_ERROR_BAD_CLASS_SOFT
                                                            : VERIFY_ERROR_BAD_CLASS_HARD;
-        Fail(error) << "expected field " << PrettyField(field)
+        Fail(error) << "expected field " << ArtField::PrettyField(field)
                     << " to be compatible with type '" << insn_type
                     << "' but found type '" << *field_type
                     << "' in put-object";
@@ -4720,7 +4725,7 @@
         // This is a global failure rather than a class change failure as the instructions and
         // the descriptors for the type should have been consistent within the same file at
         // compile time
-        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "expected field " << PrettyField(field)
+        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "expected field " << ArtField::PrettyField(field)
                                           << " to be of type '" << insn_type
                                           << "' but found type '" << *field_type << "' in get";
         return;
@@ -4732,7 +4737,7 @@
         // should have been consistent within the same file at compile time.
         VerifyError error = field_type->IsReferenceTypes() ? VERIFY_ERROR_BAD_CLASS_SOFT
                                                            : VERIFY_ERROR_BAD_CLASS_HARD;
-        Fail(error) << "expected field " << PrettyField(field)
+        Fail(error) << "expected field " << ArtField::PrettyField(field)
                     << " to be compatible with type '" << insn_type
                     << "' but found type '" << *field_type
                     << "' in get-object";
@@ -4765,7 +4770,7 @@
   DCHECK_EQ(f->GetOffset().Uint32Value(), field_offset);
   if (f == nullptr) {
     VLOG(verifier) << "Failed to find instance field at offset '" << field_offset
-                   << "' from '" << PrettyDescriptor(object_type.GetClass()) << "'";
+                   << "' from '" << mirror::Class::PrettyDescriptor(object_type.GetClass()) << "'";
   }
   return f;
 }
@@ -4784,7 +4789,7 @@
   // For an IPUT_QUICK, we now test for final flag of the field.
   if (kAccType == FieldAccessType::kAccPut) {
     if (field->IsFinal() && field->GetDeclaringClass() != GetDeclaringClass().GetClass()) {
-      Fail(VERIFY_ERROR_ACCESS_FIELD) << "cannot modify final field " << PrettyField(field)
+      Fail(VERIFY_ERROR_ACCESS_FIELD) << "cannot modify final field " << field->PrettyField()
                                       << " from other class " << GetDeclaringClass();
       return;
     }
@@ -4843,7 +4848,7 @@
         // This is a global failure rather than a class change failure as the instructions and
         // the descriptors for the type should have been consistent within the same file at
         // compile time
-        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "expected field " << PrettyField(field)
+        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "expected field " << ArtField::PrettyField(field)
                                           << " to be of type '" << insn_type
                                           << "' but found type '" << *field_type
                                           << "' in put";
@@ -4853,12 +4858,12 @@
         Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "unexpected value in v" << vregA
             << " of type " << value_type
             << " but expected " << *field_type
-            << " for store to " << PrettyField(field) << " in put";
+            << " for store to " << ArtField::PrettyField(field) << " in put";
         return;
       }
     } else {
       if (!insn_type.IsAssignableFrom(*field_type, this)) {
-        Fail(VERIFY_ERROR_BAD_CLASS_SOFT) << "expected field " << PrettyField(field)
+        Fail(VERIFY_ERROR_BAD_CLASS_SOFT) << "expected field " << ArtField::PrettyField(field)
                                           << " to be compatible with type '" << insn_type
                                           << "' but found type '" << *field_type
                                           << "' in put-object";
@@ -4877,14 +4882,14 @@
         // This is a global failure rather than a class change failure as the instructions and
         // the descriptors for the type should have been consistent within the same file at
         // compile time
-        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "expected field " << PrettyField(field)
+        Fail(VERIFY_ERROR_BAD_CLASS_HARD) << "expected field " << ArtField::PrettyField(field)
                                           << " to be of type '" << insn_type
                                           << "' but found type '" << *field_type << "' in Get";
         return;
       }
     } else {
       if (!insn_type.IsAssignableFrom(*field_type, this)) {
-        Fail(VERIFY_ERROR_BAD_CLASS_SOFT) << "expected field " << PrettyField(field)
+        Fail(VERIFY_ERROR_BAD_CLASS_SOFT) << "expected field " << ArtField::PrettyField(field)
                                           << " to be compatible with type '" << insn_type
                                           << "' but found type '" << *field_type
                                           << "' in get-object";
diff --git a/runtime/verifier/method_verifier.h b/runtime/verifier/method_verifier.h
index eb8b7a6..c6ce583 100644
--- a/runtime/verifier/method_verifier.h
+++ b/runtime/verifier/method_verifier.h
@@ -318,7 +318,7 @@
   void PrependToLastFailMessage(std::string);
 
   // Adds the given string to the end of the last failure message.
-  void AppendToLastFailMessage(std::string);
+  void AppendToLastFailMessage(const std::string& append);
 
   // Verification result for method(s). Includes a (maximum) failure kind, and (the union of)
   // all failure types.
diff --git a/runtime/verifier/method_verifier_test.cc b/runtime/verifier/method_verifier_test.cc
index 837ee2d..52be2df 100644
--- a/runtime/verifier/method_verifier_test.cc
+++ b/runtime/verifier/method_verifier_test.cc
@@ -23,6 +23,7 @@
 #include "common_runtime_test.h"
 #include "dex_file.h"
 #include "scoped_thread_state_change-inl.h"
+#include "utils.h"
 #include "verifier_log_mode.h"
 
 namespace art {
@@ -40,7 +41,14 @@
     std::string error_msg;
     MethodVerifier::FailureKind failure = MethodVerifier::VerifyClass(
         self, klass, nullptr, true, HardFailLogMode::kLogWarning, &error_msg);
-    ASSERT_TRUE(failure == MethodVerifier::kNoFailure) << error_msg;
+
+    if (StartsWith(descriptor, "Ljava/lang/invoke")) {
+      ASSERT_TRUE(failure == MethodVerifier::kSoftFailure ||
+                  failure == MethodVerifier::kNoFailure) << error_msg;
+
+    } else {
+      ASSERT_TRUE(failure == MethodVerifier::kNoFailure) << error_msg;
+    }
   }
 
   void VerifyDexFile(const DexFile& dex)
diff --git a/runtime/verifier/reg_type.cc b/runtime/verifier/reg_type.cc
index a84668b..ab23773 100644
--- a/runtime/verifier/reg_type.cc
+++ b/runtime/verifier/reg_type.cc
@@ -355,26 +355,26 @@
 
 std::string ReferenceType::Dump() const {
   std::stringstream result;
-  result << "Reference" << ": " << PrettyDescriptor(GetClass());
+  result << "Reference" << ": " << mirror::Class::PrettyDescriptor(GetClass());
   return result.str();
 }
 
 std::string PreciseReferenceType::Dump() const {
   std::stringstream result;
-  result << "Precise Reference" << ": "<< PrettyDescriptor(GetClass());
+  result << "Precise Reference" << ": "<< mirror::Class::PrettyDescriptor(GetClass());
   return result.str();
 }
 
 std::string UninitializedReferenceType::Dump() const {
   std::stringstream result;
-  result << "Uninitialized Reference" << ": " << PrettyDescriptor(GetClass());
+  result << "Uninitialized Reference" << ": " << mirror::Class::PrettyDescriptor(GetClass());
   result << " Allocation PC: " << GetAllocationPc();
   return result.str();
 }
 
 std::string UninitializedThisReferenceType::Dump() const {
   std::stringstream result;
-  result << "Uninitialized This Reference" << ": " << PrettyDescriptor(GetClass());
+  result << "Uninitialized This Reference" << ": " << mirror::Class::PrettyDescriptor(GetClass());
   result << "Allocation PC: " << GetAllocationPc();
   return result.str();
 }
@@ -730,8 +730,8 @@
 
 // See comment in reg_type.h
 mirror::Class* RegType::ClassJoin(mirror::Class* s, mirror::Class* t) {
-  DCHECK(!s->IsPrimitive()) << PrettyClass(s);
-  DCHECK(!t->IsPrimitive()) << PrettyClass(t);
+  DCHECK(!s->IsPrimitive()) << s->PrettyClass();
+  DCHECK(!t->IsPrimitive()) << t->PrettyClass();
   if (s == t) {
     return s;
   } else if (s->IsAssignableFrom(t)) {
@@ -748,7 +748,7 @@
       DCHECK(result->IsObjectClass());
       return result;
     }
-    mirror::Class* common_elem = ClassJoin(s_ct, t_ct);
+    ObjPtr<mirror::Class> common_elem = ClassJoin(s_ct, t_ct);
     ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
     mirror::Class* array_class = class_linker->FindArrayClass(Thread::Current(), &common_elem);
     DCHECK(array_class != nullptr);
diff --git a/runtime/verifier/reg_type_cache.cc b/runtime/verifier/reg_type_cache.cc
index d0493e5..93286ea 100644
--- a/runtime/verifier/reg_type_cache.cc
+++ b/runtime/verifier/reg_type_cache.cc
@@ -154,8 +154,7 @@
   if (can_load_classes_) {
     klass = class_linker->FindClass(self, descriptor, class_loader);
   } else {
-    klass = class_linker->LookupClass(self, descriptor, ComputeModifiedUtf8Hash(descriptor),
-                                      loader);
+    klass = class_linker->LookupClass(self, descriptor, loader);
     if (klass != nullptr && !klass->IsResolved()) {
       // We found the class but without it being loaded its not safe for use.
       klass = nullptr;
diff --git a/runtime/verifier/register_line-inl.h b/runtime/verifier/register_line-inl.h
index 3823143..3da1680 100644
--- a/runtime/verifier/register_line-inl.h
+++ b/runtime/verifier/register_line-inl.h
@@ -168,8 +168,7 @@
     verifier->Fail(VERIFY_ERROR_LOCKING);
     if (kDumpLockFailures) {
       VLOG(verifier) << "expected empty monitor stack in "
-                     << PrettyMethod(verifier->GetMethodReference().dex_method_index,
-                                     *verifier->GetMethodReference().dex_file);
+                     << verifier->GetMethodReference().PrettyMethod();
     }
   }
 }
diff --git a/runtime/verifier/register_line.cc b/runtime/verifier/register_line.cc
index 823336c..da3d946 100644
--- a/runtime/verifier/register_line.cc
+++ b/runtime/verifier/register_line.cc
@@ -33,8 +33,7 @@
       CHECK(!type.IsUninitializedThisReference() &&
             !type.IsUnresolvedAndUninitializedThisReference())
           << i << ": " << type.IsUninitializedThisReference() << " in "
-          << PrettyMethod(verifier->GetMethodReference().dex_method_index,
-                          *verifier->GetMethodReference().dex_file);
+          << verifier->GetMethodReference().PrettyMethod();
     }
   }
   if (!this_initialized_) {
@@ -338,8 +337,7 @@
     verifier->Fail(VERIFY_ERROR_LOCKING);
     if (kDumpLockFailures) {
       VLOG(verifier) << "monitor-enter stack overflow while verifying "
-                     << PrettyMethod(verifier->GetMethodReference().dex_method_index,
-                                     *verifier->GetMethodReference().dex_file);
+                     << verifier->GetMethodReference().PrettyMethod();
     }
   } else {
     if (SetRegToLockDepth(reg_idx, monitors_.size())) {
@@ -354,8 +352,7 @@
       verifier->Fail(VERIFY_ERROR_LOCKING);
       if (kDumpLockFailures) {
         VLOG(verifier) << "unexpected monitor-enter on register v" <<  reg_idx << " in "
-                       << PrettyMethod(verifier->GetMethodReference().dex_method_index,
-                                       *verifier->GetMethodReference().dex_file);
+                       << verifier->GetMethodReference().PrettyMethod();
       }
     }
   }
@@ -369,8 +366,7 @@
     verifier->Fail(VERIFY_ERROR_LOCKING);
     if (kDumpLockFailures) {
       VLOG(verifier) << "monitor-exit stack underflow while verifying "
-                     << PrettyMethod(verifier->GetMethodReference().dex_method_index,
-                                     *verifier->GetMethodReference().dex_file);
+                     << verifier->GetMethodReference().PrettyMethod();
     }
   } else {
     monitors_.pop_back();
@@ -390,8 +386,7 @@
       verifier->Fail(VERIFY_ERROR_LOCKING);
       if (kDumpLockFailures) {
         VLOG(verifier) << "monitor-exit not unlocking the top of the monitor stack while verifying "
-                       << PrettyMethod(verifier->GetMethodReference().dex_method_index,
-                                       *verifier->GetMethodReference().dex_file);
+                       << verifier->GetMethodReference().PrettyMethod();
       }
     } else {
       // Record the register was unlocked. This clears all aliases, thus it will also clear the
@@ -416,7 +411,7 @@
   }
 
   // Scan the map for the same value.
-  for (const std::pair<uint32_t, uint32_t>& pair : search_map) {
+  for (const std::pair<const uint32_t, uint32_t>& pair : search_map) {
     if (pair.first != src && pair.second == src_lock_levels) {
       return true;
     }
@@ -445,8 +440,7 @@
       if (kDumpLockFailures) {
         VLOG(verifier) << "mismatched stack depths (depth=" << MonitorStackDepth()
                        << ", incoming depth=" << incoming_line->MonitorStackDepth() << ") in "
-                       << PrettyMethod(verifier->GetMethodReference().dex_method_index,
-                                       *verifier->GetMethodReference().dex_file);
+                       << verifier->GetMethodReference().PrettyMethod();
       }
     } else if (reg_to_lock_depths_ != incoming_line->reg_to_lock_depths_) {
       for (uint32_t idx = 0; idx < num_regs_; idx++) {
@@ -480,8 +474,7 @@
             if (kDumpLockFailures) {
               VLOG(verifier) << "mismatched stack depths for register v" << idx
                              << ": " << depths  << " != " << incoming_depths << " in "
-                             << PrettyMethod(verifier->GetMethodReference().dex_method_index,
-                                             *verifier->GetMethodReference().dex_file);
+                             << verifier->GetMethodReference().PrettyMethod();
             }
             break;
           }
@@ -523,8 +516,7 @@
                 VLOG(verifier) << "mismatched lock levels for register v" << idx << ": "
                                << std::hex << locked_levels << std::dec  << " != "
                                << std::hex << incoming_locked_levels << std::dec << " in "
-                               << PrettyMethod(verifier->GetMethodReference().dex_method_index,
-                                               *verifier->GetMethodReference().dex_file);
+                               << verifier->GetMethodReference().PrettyMethod();
               }
               break;
             }
diff --git a/runtime/verifier/verifier_deps.cc b/runtime/verifier/verifier_deps.cc
index 3e1958f..c395612 100644
--- a/runtime/verifier/verifier_deps.cc
+++ b/runtime/verifier/verifier_deps.cc
@@ -39,6 +39,11 @@
   return (it == dex_deps_.end()) ? nullptr : it->second.get();
 }
 
+const VerifierDeps::DexFileDeps* VerifierDeps::GetDexFileDeps(const DexFile& dex_file) const {
+  auto it = dex_deps_.find(&dex_file);
+  return (it == dex_deps_.end()) ? nullptr : it->second.get();
+}
+
 template <typename T>
 uint16_t VerifierDeps::GetAccessFlags(T* element) {
   static_assert(kAccJavaFlagsMask == 0xFFFF, "Unexpected value of a constant");
@@ -95,12 +100,12 @@
   return new_id;
 }
 
-std::string VerifierDeps::GetStringFromId(const DexFile& dex_file, uint32_t string_id) {
+std::string VerifierDeps::GetStringFromId(const DexFile& dex_file, uint32_t string_id) const {
   uint32_t num_ids_in_dex = dex_file.NumStringIds();
   if (string_id < num_ids_in_dex) {
     return std::string(dex_file.StringDataByIdx(string_id));
   } else {
-    DexFileDeps* deps = GetDexFileDeps(dex_file);
+    const DexFileDeps* deps = GetDexFileDeps(dex_file);
     DCHECK(deps != nullptr);
     string_id -= num_ids_in_dex;
     CHECK_LT(string_id, deps->strings_.size());
@@ -108,7 +113,7 @@
   }
 }
 
-bool VerifierDeps::IsInClassPath(ObjPtr<mirror::Class> klass) {
+bool VerifierDeps::IsInClassPath(ObjPtr<mirror::Class> klass) const {
   DCHECK(klass != nullptr);
 
   ObjPtr<mirror::DexCache> dex_cache = klass->GetDexCache();
@@ -118,7 +123,7 @@
     // We could avoid recording dependencies on arrays with component types in
     // the compiled DEX files but we choose to record them anyway so as to
     // record the access flags VM sets for array classes.
-    DCHECK(klass->IsArrayClass()) << PrettyDescriptor(klass);
+    DCHECK(klass->IsArrayClass()) << klass->PrettyDescriptor();
     return true;
   }
 
@@ -280,6 +285,22 @@
   return callbacks->GetVerifierDeps();
 }
 
+void VerifierDeps::MaybeRecordVerificationStatus(const DexFile& dex_file,
+                                                 uint16_t type_idx,
+                                                 MethodVerifier::FailureKind failure_kind) {
+  if (failure_kind == MethodVerifier::kNoFailure) {
+    // We only record classes that did not fully verify at compile time.
+    return;
+  }
+
+  VerifierDeps* singleton = GetVerifierDepsSingleton();
+  if (singleton != nullptr) {
+    DexFileDeps* dex_deps = singleton->GetDexFileDeps(dex_file);
+    MutexLock mu(Thread::Current(), *Locks::verifier_deps_lock_);
+    dex_deps->unverified_classes_.push_back(type_idx);
+  }
+}
+
 void VerifierDeps::MaybeRecordClassResolution(const DexFile& dex_file,
                                               uint16_t type_idx,
                                               mirror::Class* klass) {
@@ -360,6 +381,14 @@
   }
 }
 
+static inline void EncodeUint16Vector(std::vector<uint8_t>* out,
+                                      const std::vector<uint16_t>& vector) {
+  EncodeUnsignedLeb128(out, vector.size());
+  for (uint16_t entry : vector) {
+    EncodeUnsignedLeb128(out, entry);
+  }
+}
+
 template<typename T>
 static inline void DecodeSet(const uint8_t** in, const uint8_t* end, std::set<T>* set) {
   DCHECK(set->empty());
@@ -371,6 +400,17 @@
   }
 }
 
+static inline void DecodeUint16Vector(const uint8_t** in,
+                                      const uint8_t* end,
+                                      std::vector<uint16_t>* vector) {
+  DCHECK(vector->empty());
+  size_t num_entries = DecodeUint32WithOverflowCheck(in, end);
+  vector->reserve(num_entries);
+  for (size_t i = 0; i < num_entries; ++i) {
+    vector->push_back(dchecked_integral_cast<uint16_t>(DecodeUint32WithOverflowCheck(in, end)));
+  }
+}
+
 static inline void EncodeStringVector(std::vector<uint8_t>* out,
                                       const std::vector<std::string>& strings) {
   EncodeUnsignedLeb128(out, strings.size());
@@ -396,33 +436,45 @@
   }
 }
 
-void VerifierDeps::Encode(std::vector<uint8_t>* buffer) const {
+void VerifierDeps::Encode(const std::vector<const DexFile*>& dex_files,
+                          std::vector<uint8_t>* buffer) const {
   MutexLock mu(Thread::Current(), *Locks::verifier_deps_lock_);
-  for (auto& entry : dex_deps_) {
-    EncodeStringVector(buffer, entry.second->strings_);
-    EncodeSet(buffer, entry.second->assignable_types_);
-    EncodeSet(buffer, entry.second->unassignable_types_);
-    EncodeSet(buffer, entry.second->classes_);
-    EncodeSet(buffer, entry.second->fields_);
-    EncodeSet(buffer, entry.second->direct_methods_);
-    EncodeSet(buffer, entry.second->virtual_methods_);
-    EncodeSet(buffer, entry.second->interface_methods_);
+  for (const DexFile* dex_file : dex_files) {
+    const DexFileDeps& deps = *GetDexFileDeps(*dex_file);
+    EncodeStringVector(buffer, deps.strings_);
+    EncodeSet(buffer, deps.assignable_types_);
+    EncodeSet(buffer, deps.unassignable_types_);
+    EncodeSet(buffer, deps.classes_);
+    EncodeSet(buffer, deps.fields_);
+    EncodeSet(buffer, deps.direct_methods_);
+    EncodeSet(buffer, deps.virtual_methods_);
+    EncodeSet(buffer, deps.interface_methods_);
+    EncodeUint16Vector(buffer, deps.unverified_classes_);
   }
 }
 
-VerifierDeps::VerifierDeps(const std::vector<const DexFile*>& dex_files, ArrayRef<uint8_t> data)
+VerifierDeps::VerifierDeps(const std::vector<const DexFile*>& dex_files,
+                           ArrayRef<const uint8_t> data)
     : VerifierDeps(dex_files) {
+  if (data.empty()) {
+    // Return eagerly, as the first thing we expect from VerifierDeps data is
+    // the number of created strings, even if there is no dependency.
+    // Currently, only the boot image does not have any VerifierDeps data.
+    return;
+  }
   const uint8_t* data_start = data.data();
   const uint8_t* data_end = data_start + data.size();
-  for (auto& entry : dex_deps_) {
-    DecodeStringVector(&data_start, data_end, &entry.second->strings_);
-    DecodeSet(&data_start, data_end, &entry.second->assignable_types_);
-    DecodeSet(&data_start, data_end, &entry.second->unassignable_types_);
-    DecodeSet(&data_start, data_end, &entry.second->classes_);
-    DecodeSet(&data_start, data_end, &entry.second->fields_);
-    DecodeSet(&data_start, data_end, &entry.second->direct_methods_);
-    DecodeSet(&data_start, data_end, &entry.second->virtual_methods_);
-    DecodeSet(&data_start, data_end, &entry.second->interface_methods_);
+  for (const DexFile* dex_file : dex_files) {
+    DexFileDeps* deps = GetDexFileDeps(*dex_file);
+    DecodeStringVector(&data_start, data_end, &deps->strings_);
+    DecodeSet(&data_start, data_end, &deps->assignable_types_);
+    DecodeSet(&data_start, data_end, &deps->unassignable_types_);
+    DecodeSet(&data_start, data_end, &deps->classes_);
+    DecodeSet(&data_start, data_end, &deps->fields_);
+    DecodeSet(&data_start, data_end, &deps->direct_methods_);
+    DecodeSet(&data_start, data_end, &deps->virtual_methods_);
+    DecodeSet(&data_start, data_end, &deps->interface_methods_);
+    DecodeUint16Vector(&data_start, data_end, &deps->unverified_classes_);
   }
   CHECK_LE(data_start, data_end);
 }
@@ -463,7 +515,358 @@
          (fields_ == rhs.fields_) &&
          (direct_methods_ == rhs.direct_methods_) &&
          (virtual_methods_ == rhs.virtual_methods_) &&
-         (interface_methods_ == rhs.interface_methods_);
+         (interface_methods_ == rhs.interface_methods_) &&
+         (unverified_classes_ == rhs.unverified_classes_);
+}
+
+void VerifierDeps::Dump(VariableIndentationOutputStream* vios) const {
+  for (const auto& dep : dex_deps_) {
+    const DexFile& dex_file = *dep.first;
+    vios->Stream()
+        << "Dependencies of "
+        << dex_file.GetLocation()
+        << ":\n";
+
+    ScopedIndentation indent(vios);
+
+    for (const std::string& str : dep.second->strings_) {
+      vios->Stream() << "Extra string: " << str << "\n";
+    }
+
+    for (const TypeAssignability& entry : dep.second->assignable_types_) {
+      vios->Stream()
+        << GetStringFromId(dex_file, entry.GetSource())
+        << " must be assignable to "
+        << GetStringFromId(dex_file, entry.GetDestination())
+        << "\n";
+    }
+
+    for (const TypeAssignability& entry : dep.second->unassignable_types_) {
+      vios->Stream()
+        << GetStringFromId(dex_file, entry.GetSource())
+        << " must not be assignable to "
+        << GetStringFromId(dex_file, entry.GetDestination())
+        << "\n";
+    }
+
+    for (const ClassResolution& entry : dep.second->classes_) {
+      vios->Stream()
+          << dex_file.StringByTypeIdx(entry.GetDexTypeIndex())
+          << (entry.IsResolved() ? " must be resolved " : "must not be resolved ")
+          << " with access flags " << std::hex << entry.GetAccessFlags() << std::dec
+          << "\n";
+    }
+
+    for (const FieldResolution& entry : dep.second->fields_) {
+      const DexFile::FieldId& field_id = dex_file.GetFieldId(entry.GetDexFieldIndex());
+      vios->Stream()
+          << dex_file.GetFieldDeclaringClassDescriptor(field_id) << "->"
+          << dex_file.GetFieldName(field_id) << ":"
+          << dex_file.GetFieldTypeDescriptor(field_id)
+          << " is expected to be ";
+      if (!entry.IsResolved()) {
+        vios->Stream() << "unresolved\n";
+      } else {
+        vios->Stream()
+          << "in class "
+          << GetStringFromId(dex_file, entry.GetDeclaringClassIndex())
+          << ", and have the access flags " << std::hex << entry.GetAccessFlags() << std::dec
+          << "\n";
+      }
+    }
+
+    for (const auto& entry :
+            { std::make_pair(kDirectMethodResolution, dep.second->direct_methods_),
+              std::make_pair(kVirtualMethodResolution, dep.second->virtual_methods_),
+              std::make_pair(kInterfaceMethodResolution, dep.second->interface_methods_) }) {
+      for (const MethodResolution& method : entry.second) {
+        const DexFile::MethodId& method_id = dex_file.GetMethodId(method.GetDexMethodIndex());
+        vios->Stream()
+            << dex_file.GetMethodDeclaringClassDescriptor(method_id) << "->"
+            << dex_file.GetMethodName(method_id)
+            << dex_file.GetMethodSignature(method_id).ToString()
+            << " is expected to be ";
+        if (!method.IsResolved()) {
+          vios->Stream() << "unresolved\n";
+        } else {
+          vios->Stream()
+            << "in class "
+            << GetStringFromId(dex_file, method.GetDeclaringClassIndex())
+            << ", have the access flags " << std::hex << method.GetAccessFlags() << std::dec
+            << ", and be of kind " << entry.first
+            << "\n";
+        }
+      }
+    }
+
+    for (uint16_t type_index : dep.second->unverified_classes_) {
+      vios->Stream()
+          << dex_file.StringByTypeIdx(type_index)
+          << " is expected to be verified at runtime\n";
+    }
+  }
+}
+
+bool VerifierDeps::ValidateDependencies(Handle<mirror::ClassLoader> class_loader,
+                                        Thread* self) const {
+  for (const auto& entry : dex_deps_) {
+    if (!VerifyDexFile(class_loader, *entry.first, *entry.second, self)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// TODO: share that helper with other parts of the compiler that have
+// the same lookup pattern.
+static mirror::Class* FindClassAndClearException(ClassLinker* class_linker,
+                                                 Thread* self,
+                                                 const char* name,
+                                                 Handle<mirror::ClassLoader> class_loader)
+    REQUIRES_SHARED(Locks::mutator_lock_) {
+  mirror::Class* result = class_linker->FindClass(self, name, class_loader);
+  if (result == nullptr) {
+    DCHECK(self->IsExceptionPending());
+    self->ClearException();
+  }
+  return result;
+}
+
+bool VerifierDeps::VerifyAssignability(Handle<mirror::ClassLoader> class_loader,
+                                       const DexFile& dex_file,
+                                       const std::set<TypeAssignability>& assignables,
+                                       bool expected_assignability,
+                                       Thread* self) const {
+  StackHandleScope<2> hs(self);
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  MutableHandle<mirror::Class> source(hs.NewHandle<mirror::Class>(nullptr));
+  MutableHandle<mirror::Class> destination(hs.NewHandle<mirror::Class>(nullptr));
+
+  for (const auto& entry : assignables) {
+    const std::string& destination_desc = GetStringFromId(dex_file, entry.GetDestination());
+    destination.Assign(
+        FindClassAndClearException(class_linker, self, destination_desc.c_str(), class_loader));
+    const std::string& source_desc = GetStringFromId(dex_file, entry.GetSource());
+    source.Assign(
+        FindClassAndClearException(class_linker, self, source_desc.c_str(), class_loader));
+
+    if (destination.Get() == nullptr) {
+      LOG(INFO) << "VerifiersDeps: Could not resolve class " << destination_desc;
+      return false;
+    }
+
+    if (source.Get() == nullptr) {
+      LOG(INFO) << "VerifierDeps: Could not resolve class " << source_desc;
+      return false;
+    }
+
+    DCHECK(destination->IsResolved() && source->IsResolved());
+    if (destination->IsAssignableFrom(source.Get()) != expected_assignability) {
+      LOG(INFO) << "VerifierDeps: Class "
+                << destination_desc
+                << (expected_assignability ? " not " : " ")
+                << "assignable from "
+                << source_desc;
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifierDeps::VerifyClasses(Handle<mirror::ClassLoader> class_loader,
+                                 const DexFile& dex_file,
+                                 const std::set<ClassResolution>& classes,
+                                 Thread* self) const {
+  StackHandleScope<1> hs(self);
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  MutableHandle<mirror::Class> cls(hs.NewHandle<mirror::Class>(nullptr));
+  for (const auto& entry : classes) {
+    const char* descriptor = dex_file.StringByTypeIdx(entry.GetDexTypeIndex());
+    cls.Assign(FindClassAndClearException(class_linker, self, descriptor, class_loader));
+
+    if (entry.IsResolved()) {
+      if (cls.Get() == nullptr) {
+        LOG(INFO) << "VerifierDeps: Could not resolve class " << descriptor;
+        return false;
+      } else if (entry.GetAccessFlags() != GetAccessFlags(cls.Get())) {
+        LOG(INFO) << "VerifierDeps: Unexpected access flags on class "
+                  << descriptor
+                  << std::hex
+                  << " (expected="
+                  << entry.GetAccessFlags()
+                  << ", actual="
+                  << GetAccessFlags(cls.Get()) << ")"
+                  << std::dec;
+        return false;
+      }
+    } else if (cls.Get() != nullptr) {
+      LOG(INFO) << "VerifierDeps: Unexpected successful resolution of class " << descriptor;
+      return false;
+    }
+  }
+  return true;
+}
+
+static std::string GetFieldDescription(const DexFile& dex_file, uint32_t index) {
+  const DexFile::FieldId& field_id = dex_file.GetFieldId(index);
+  return std::string(dex_file.GetFieldDeclaringClassDescriptor(field_id))
+      + "->"
+      + dex_file.GetFieldName(field_id)
+      + ":"
+      + dex_file.GetFieldTypeDescriptor(field_id);
+}
+
+bool VerifierDeps::VerifyFields(Handle<mirror::ClassLoader> class_loader,
+                                const DexFile& dex_file,
+                                const std::set<FieldResolution>& fields,
+                                Thread* self) const {
+  // Check recorded fields are resolved the same way, have the same recorded class,
+  // and have the same recorded flags.
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  StackHandleScope<1> hs(self);
+  Handle<mirror::DexCache> dex_cache(
+      hs.NewHandle(class_linker->FindDexCache(self, dex_file, /* allow_failure */ false)));
+  for (const auto& entry : fields) {
+    ArtField* field = class_linker->ResolveFieldJLS(
+        dex_file, entry.GetDexFieldIndex(), dex_cache, class_loader);
+
+    if (field == nullptr) {
+      DCHECK(self->IsExceptionPending());
+      self->ClearException();
+    }
+
+    if (entry.IsResolved()) {
+      std::string expected_decl_klass = GetStringFromId(dex_file, entry.GetDeclaringClassIndex());
+      std::string temp;
+      if (field == nullptr) {
+        LOG(INFO) << "VerifierDeps: Could not resolve field "
+                  << GetFieldDescription(dex_file, entry.GetDexFieldIndex());
+        return false;
+      } else if (expected_decl_klass != field->GetDeclaringClass()->GetDescriptor(&temp)) {
+        LOG(INFO) << "VerifierDeps: Unexpected declaring class for field resolution "
+                  << GetFieldDescription(dex_file, entry.GetDexFieldIndex())
+                  << " (expected=" << expected_decl_klass
+                  << ", actual=" << field->GetDeclaringClass()->GetDescriptor(&temp) << ")";
+        return false;
+      } else if (entry.GetAccessFlags() != GetAccessFlags(field)) {
+        LOG(INFO) << "VerifierDeps: Unexpected access flags for resolved field "
+                  << GetFieldDescription(dex_file, entry.GetDexFieldIndex())
+                  << std::hex << " (expected=" << entry.GetAccessFlags()
+                  << ", actual=" << GetAccessFlags(field) << ")" << std::dec;
+        return false;
+      }
+    } else if (field != nullptr) {
+      LOG(INFO) << "VerifierDeps: Unexpected successful resolution of field "
+                << GetFieldDescription(dex_file, entry.GetDexFieldIndex());
+      return false;
+    }
+  }
+  return true;
+}
+
+static std::string GetMethodDescription(const DexFile& dex_file, uint32_t index) {
+  const DexFile::MethodId& method_id = dex_file.GetMethodId(index);
+  return std::string(dex_file.GetMethodDeclaringClassDescriptor(method_id))
+      + "->"
+      + dex_file.GetMethodName(method_id)
+      + dex_file.GetMethodSignature(method_id).ToString();
+}
+
+bool VerifierDeps::VerifyMethods(Handle<mirror::ClassLoader> class_loader,
+                                 const DexFile& dex_file,
+                                 const std::set<MethodResolution>& methods,
+                                 MethodResolutionKind kind,
+                                 Thread* self) const {
+  ClassLinker* class_linker = Runtime::Current()->GetClassLinker();
+  PointerSize pointer_size = class_linker->GetImagePointerSize();
+
+  for (const auto& entry : methods) {
+    const DexFile::MethodId& method_id = dex_file.GetMethodId(entry.GetDexMethodIndex());
+
+    const char* name = dex_file.GetMethodName(method_id);
+    const Signature signature = dex_file.GetMethodSignature(method_id);
+    const char* descriptor = dex_file.GetMethodDeclaringClassDescriptor(method_id);
+
+    mirror::Class* cls = FindClassAndClearException(class_linker, self, descriptor, class_loader);
+    if (cls == nullptr) {
+      LOG(INFO) << "VerifierDeps: Could not resolve class " << descriptor;
+      return false;
+    }
+    DCHECK(cls->IsResolved());
+    ArtMethod* method = nullptr;
+    if (kind == kDirectMethodResolution) {
+      method = cls->FindDirectMethod(name, signature, pointer_size);
+    } else if (kind == kVirtualMethodResolution) {
+      method = cls->FindVirtualMethod(name, signature, pointer_size);
+    } else {
+      DCHECK_EQ(kind, kInterfaceMethodResolution);
+      method = cls->FindInterfaceMethod(name, signature, pointer_size);
+    }
+
+    if (entry.IsResolved()) {
+      std::string temp;
+      std::string expected_decl_klass = GetStringFromId(dex_file, entry.GetDeclaringClassIndex());
+      if (method == nullptr) {
+        LOG(INFO) << "VerifierDeps: Could not resolve "
+                  << kind
+                  << " method "
+                  << GetMethodDescription(dex_file, entry.GetDexMethodIndex());
+        return false;
+      } else if (expected_decl_klass != method->GetDeclaringClass()->GetDescriptor(&temp)) {
+        LOG(INFO) << "VerifierDeps: Unexpected declaring class for "
+                  << kind
+                  << " method resolution "
+                  << GetMethodDescription(dex_file, entry.GetDexMethodIndex())
+                  << " (expected="
+                  << expected_decl_klass
+                  << ", actual="
+                  << method->GetDeclaringClass()->GetDescriptor(&temp)
+                  << ")";
+        return false;
+      } else if (entry.GetAccessFlags() != GetAccessFlags(method)) {
+        LOG(INFO) << "VerifierDeps: Unexpected access flags for resolved "
+                  << kind
+                  << " method resolution "
+                  << GetMethodDescription(dex_file, entry.GetDexMethodIndex())
+                  << std::hex
+                  << " (expected="
+                  << entry.GetAccessFlags()
+                  << ", actual="
+                  << GetAccessFlags(method) << ")"
+                  << std::dec;
+        return false;
+      }
+    } else if (method != nullptr) {
+      LOG(INFO) << "VerifierDeps: Unexpected successful resolution of "
+                << kind
+                << " method "
+                << GetMethodDescription(dex_file, entry.GetDexMethodIndex());
+      return false;
+    }
+  }
+  return true;
+}
+
+bool VerifierDeps::VerifyDexFile(Handle<mirror::ClassLoader> class_loader,
+                                 const DexFile& dex_file,
+                                 const DexFileDeps& deps,
+                                 Thread* self) const {
+  bool result = VerifyAssignability(
+      class_loader, dex_file, deps.assignable_types_, /* expected_assignability */ true, self);
+  result = result && VerifyAssignability(
+      class_loader, dex_file, deps.unassignable_types_, /* expected_assignability */ false, self);
+
+  result = result && VerifyClasses(class_loader, dex_file, deps.classes_, self);
+  result = result && VerifyFields(class_loader, dex_file, deps.fields_, self);
+
+  result = result && VerifyMethods(
+      class_loader, dex_file, deps.direct_methods_, kDirectMethodResolution, self);
+  result = result && VerifyMethods(
+      class_loader, dex_file, deps.virtual_methods_, kVirtualMethodResolution, self);
+  result = result && VerifyMethods(
+      class_loader, dex_file, deps.interface_methods_, kInterfaceMethodResolution, self);
+
+  return result;
 }
 
 }  // namespace verifier
diff --git a/runtime/verifier/verifier_deps.h b/runtime/verifier/verifier_deps.h
index 3223f6f..7b419d4 100644
--- a/runtime/verifier/verifier_deps.h
+++ b/runtime/verifier/verifier_deps.h
@@ -25,7 +25,9 @@
 #include "art_method.h"
 #include "base/array_ref.h"
 #include "base/mutex.h"
+#include "indenter.h"
 #include "method_resolution_kind.h"
+#include "method_verifier.h"  // For MethodVerifier::FailureKind.
 #include "obj_ptr.h"
 #include "os.h"
 
@@ -49,6 +51,16 @@
   explicit VerifierDeps(const std::vector<const DexFile*>& dex_files)
       REQUIRES(!Locks::verifier_deps_lock_);
 
+  VerifierDeps(const std::vector<const DexFile*>& dex_files,
+               ArrayRef<const uint8_t> data)
+      REQUIRES(!Locks::verifier_deps_lock_);
+
+  // Record the verification status of the class at `type_idx`.
+  static void MaybeRecordVerificationStatus(const DexFile& dex_file,
+                                            uint16_t type_idx,
+                                            MethodVerifier::FailureKind failure_kind)
+      REQUIRES(!Locks::verifier_deps_lock_);
+
   // Record the outcome `klass` of resolving type `type_idx` from `dex_file`.
   // If `klass` is null, the class is assumed unresolved.
   static void MaybeRecordClassResolution(const DexFile& dex_file,
@@ -87,16 +99,31 @@
       REQUIRES(!Locks::verifier_deps_lock_);
 
   // Serialize the recorded dependencies and store the data into `buffer`.
-  void Encode(std::vector<uint8_t>* buffer) const
+  // `dex_files` provides the order of the dex files in which the dependencies
+  // should be emitted.
+  void Encode(const std::vector<const DexFile*>& dex_files, std::vector<uint8_t>* buffer) const
       REQUIRES(!Locks::verifier_deps_lock_);
 
+  // NO_THREAD_SAFETY_ANALYSIS as Dump iterates over dex_deps_, which is guarded by
+  // verifier_deps_lock_, but we expect Dump to be called once the deps collection is done.
+  void Dump(VariableIndentationOutputStream* vios) const
+      NO_THREAD_SAFETY_ANALYSIS;
+
+  // Verify the encoded dependencies of this `VerifierDeps` are still valid.
+  // NO_THREAD_SAFETY_ANALYSIS, as this must be called on a read-only `VerifierDeps`.
+  bool ValidateDependencies(Handle<mirror::ClassLoader> class_loader, Thread* self) const
+      NO_THREAD_SAFETY_ANALYSIS;
+
+  // NO_THREAD_SAFETY_ANALSYS, as this is queried when the VerifierDeps are
+  // fully created.
+  const std::vector<uint16_t>& GetUnverifiedClasses(const DexFile& dex_file) const
+      NO_THREAD_SAFETY_ANALYSIS {
+    return GetDexFileDeps(dex_file)->unverified_classes_;
+  }
+
  private:
   static constexpr uint16_t kUnresolvedMarker = static_cast<uint16_t>(-1);
 
-  // Only used in tests to reconstruct the data structure from serialized data.
-  VerifierDeps(const std::vector<const DexFile*>& dex_files, ArrayRef<uint8_t> data)
-      REQUIRES(!Locks::verifier_deps_lock_);
-
   using ClassResolutionBase = std::tuple<uint32_t, uint16_t>;
   struct ClassResolution : public ClassResolutionBase {
     ClassResolution() = default;
@@ -136,7 +163,7 @@
   };
 
   using TypeAssignabilityBase = std::tuple<uint32_t, uint32_t>;
-  struct TypeAssignability : public std::tuple<uint32_t, uint32_t> {
+  struct TypeAssignability : public TypeAssignabilityBase {
     TypeAssignability() = default;
     TypeAssignability(const TypeAssignability&) = default;
     TypeAssignability(uint32_t destination_idx, uint32_t source_idx)
@@ -165,6 +192,9 @@
     std::set<MethodResolution> virtual_methods_;
     std::set<MethodResolution> interface_methods_;
 
+    // List of classes that were not fully verified in that dex file.
+    std::vector<uint16_t> unverified_classes_;
+
     bool Equals(const DexFileDeps& rhs) const;
   };
 
@@ -175,9 +205,12 @@
   DexFileDeps* GetDexFileDeps(const DexFile& dex_file)
       NO_THREAD_SAFETY_ANALYSIS;
 
+  const DexFileDeps* GetDexFileDeps(const DexFile& dex_file) const
+      NO_THREAD_SAFETY_ANALYSIS;
+
   // Returns true if `klass` is null or not defined in any of dex files which
   // were reported as being compiled.
-  bool IsInClassPath(ObjPtr<mirror::Class> klass)
+  bool IsInClassPath(ObjPtr<mirror::Class> klass) const
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Returns the index of `str`. If it is defined in `dex_file_`, this is the dex
@@ -188,13 +221,13 @@
       REQUIRES(Locks::verifier_deps_lock_);
 
   // Returns the string represented by `id`.
-  std::string GetStringFromId(const DexFile& dex_file, uint32_t string_id)
+  std::string GetStringFromId(const DexFile& dex_file, uint32_t string_id) const
       REQUIRES(Locks::verifier_deps_lock_);
 
   // Returns the bytecode access flags of `element` (bottom 16 bits), or
   // `kUnresolvedMarker` if `element` is null.
   template <typename T>
-  uint16_t GetAccessFlags(T* element)
+  static uint16_t GetAccessFlags(T* element)
       REQUIRES_SHARED(Locks::mutator_lock_);
 
   // Returns a string ID of the descriptor of the declaring class of `element`,
@@ -234,6 +267,54 @@
   bool Equals(const VerifierDeps& rhs) const
       REQUIRES(!Locks::verifier_deps_lock_);
 
+  // Verify `dex_file` according to the `deps`, that is going over each
+  // `DexFileDeps` field, and checking that the recorded information still
+  // holds.
+  bool VerifyDexFile(Handle<mirror::ClassLoader> class_loader,
+                     const DexFile& dex_file,
+                     const DexFileDeps& deps,
+                     Thread* self) const
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(Locks::verifier_deps_lock_);
+
+  bool VerifyAssignability(Handle<mirror::ClassLoader> class_loader,
+                           const DexFile& dex_file,
+                           const std::set<TypeAssignability>& assignables,
+                           bool expected_assignability,
+                           Thread* self) const
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(Locks::verifier_deps_lock_);
+
+  // Verify that the set of resolved classes at the point of creation
+  // of this `VerifierDeps` is still the same.
+  bool VerifyClasses(Handle<mirror::ClassLoader> class_loader,
+                     const DexFile& dex_file,
+                     const std::set<ClassResolution>& classes,
+                     Thread* self) const
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(Locks::verifier_deps_lock_);
+
+  // Verify that the set of resolved fields at the point of creation
+  // of this `VerifierDeps` is still the same, and each field resolves to the
+  // same field holder and access flags.
+  bool VerifyFields(Handle<mirror::ClassLoader> class_loader,
+                    const DexFile& dex_file,
+                    const std::set<FieldResolution>& classes,
+                    Thread* self) const
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(Locks::verifier_deps_lock_);
+
+  // Verify that the set of resolved methods at the point of creation
+  // of this `VerifierDeps` is still the same, and each method resolves to the
+  // same method holder, access flags, and invocation kind.
+  bool VerifyMethods(Handle<mirror::ClassLoader> class_loader,
+                     const DexFile& dex_file,
+                     const std::set<MethodResolution>& methods,
+                     MethodResolutionKind kind,
+                     Thread* self) const
+      REQUIRES_SHARED(Locks::mutator_lock_)
+      REQUIRES(Locks::verifier_deps_lock_);
+
   // Map from DexFiles into dependencies collected from verification of their methods.
   std::map<const DexFile*, std::unique_ptr<DexFileDeps>> dex_deps_
       GUARDED_BY(Locks::verifier_deps_lock_);
@@ -241,6 +322,9 @@
   friend class VerifierDepsTest;
   ART_FRIEND_TEST(VerifierDepsTest, StringToId);
   ART_FRIEND_TEST(VerifierDepsTest, EncodeDecode);
+  ART_FRIEND_TEST(VerifierDepsTest, EncodeDecodeMulti);
+  ART_FRIEND_TEST(VerifierDepsTest, VerifyDeps);
+  ART_FRIEND_TEST(VerifierDepsTest, CompilerDriver);
 };
 
 }  // namespace verifier
diff --git a/runtime/verify_object-inl.h b/runtime/verify_object-inl.h
index f7a8249..43151dd 100644
--- a/runtime/verify_object-inl.h
+++ b/runtime/verify_object-inl.h
@@ -21,10 +21,11 @@
 
 #include "gc/heap.h"
 #include "mirror/object-inl.h"
+#include "obj_ptr-inl.h"
 
 namespace art {
 
-inline void VerifyObject(mirror::Object* obj) {
+inline void VerifyObject(ObjPtr<mirror::Object> obj) {
   if (kVerifyObjectSupport > kVerifyObjectModeDisabled && obj != nullptr) {
     if (kVerifyObjectSupport > kVerifyObjectModeFast) {
       // Slow object verification, try the heap right away.
@@ -32,7 +33,7 @@
     } else {
       // Fast object verification, only call the heap if our quick sanity tests fail. The heap will
       // print the diagnostic message.
-      bool failed = !IsAligned<kObjectAlignment>(obj);
+      bool failed = !IsAligned<kObjectAlignment>(obj.Ptr());
       if (!failed) {
         mirror::Class* c = obj->GetClass<kVerifyNone>();
         failed = failed || !IsAligned<kObjectAlignment>(c);
@@ -45,7 +46,7 @@
   }
 }
 
-inline bool VerifyClassClass(mirror::Class* c) {
+inline bool VerifyClassClass(ObjPtr<mirror::Class> c) {
   if (UNLIKELY(c == nullptr)) {
     return false;
   }
diff --git a/runtime/verify_object.h b/runtime/verify_object.h
index 8e1653d..384e56f 100644
--- a/runtime/verify_object.h
+++ b/runtime/verify_object.h
@@ -20,6 +20,7 @@
 #include <stdint.h>
 
 #include "base/macros.h"
+#include "obj_ptr.h"
 
 namespace art {
 
@@ -52,10 +53,10 @@
 static constexpr VerifyObjectMode kVerifyObjectSupport =
     kDefaultVerifyFlags != 0 ? kVerifyObjectModeFast : kVerifyObjectModeDisabled;
 
-ALWAYS_INLINE void VerifyObject(mirror::Object* obj) NO_THREAD_SAFETY_ANALYSIS;
+ALWAYS_INLINE void VerifyObject(ObjPtr<mirror::Object> obj) NO_THREAD_SAFETY_ANALYSIS;
 
 // Check that c.getClass() == c.getClass().getClass().
-ALWAYS_INLINE bool VerifyClassClass(mirror::Class* c) NO_THREAD_SAFETY_ANALYSIS;
+ALWAYS_INLINE bool VerifyClassClass(ObjPtr<mirror::Class> c) NO_THREAD_SAFETY_ANALYSIS;
 
 }  // namespace art
 
diff --git a/runtime/well_known_classes.cc b/runtime/well_known_classes.cc
index 4dcf58f..3549586 100644
--- a/runtime/well_known_classes.cc
+++ b/runtime/well_known_classes.cc
@@ -22,8 +22,10 @@
 
 #include "base/logging.h"
 #include "entrypoints/quick/quick_entrypoints_enum.h"
+#include "jni_internal.h"
 #include "mirror/class.h"
 #include "mirror/throwable.h"
+#include "obj_ptr-inl.h"
 #include "ScopedLocalRef.h"
 #include "scoped_thread_state_change-inl.h"
 #include "thread-inl.h"
@@ -33,9 +35,12 @@
 jclass WellKnownClasses::com_android_dex_Dex;
 jclass WellKnownClasses::dalvik_annotation_optimization_CriticalNative;
 jclass WellKnownClasses::dalvik_annotation_optimization_FastNative;
+jclass WellKnownClasses::dalvik_system_BaseDexClassLoader;
+jclass WellKnownClasses::dalvik_system_DexClassLoader;
 jclass WellKnownClasses::dalvik_system_DexFile;
 jclass WellKnownClasses::dalvik_system_DexPathList;
 jclass WellKnownClasses::dalvik_system_DexPathList__Element;
+jclass WellKnownClasses::dalvik_system_EmulatedStackFrame;
 jclass WellKnownClasses::dalvik_system_PathClassLoader;
 jclass WellKnownClasses::dalvik_system_VMRuntime;
 jclass WellKnownClasses::java_lang_annotation_Annotation__array;
@@ -106,7 +111,7 @@
 
 jfieldID WellKnownClasses::dalvik_system_DexFile_cookie;
 jfieldID WellKnownClasses::dalvik_system_DexFile_fileName;
-jfieldID WellKnownClasses::dalvik_system_PathClassLoader_pathList;
+jfieldID WellKnownClasses::dalvik_system_BaseDexClassLoader_pathList;
 jfieldID WellKnownClasses::dalvik_system_DexPathList_dexElements;
 jfieldID WellKnownClasses::dalvik_system_DexPathList__Element_dexFile;
 jfieldID WellKnownClasses::java_lang_Thread_daemon;
@@ -215,9 +220,9 @@
   ScopedObjectAccess soa(Thread::Current());
   #define LOAD_STRING_INIT(init_runtime_name, init_signature, new_runtime_name,             \
                            new_java_name, new_signature, ...)                               \
-      init_runtime_name = soa.DecodeMethod(                                                 \
+      init_runtime_name = jni::DecodeArtMethod(                                             \
           CacheMethod(env, java_lang_String, false, "<init>", init_signature));             \
-      new_runtime_name = soa.DecodeMethod(                                                  \
+      new_runtime_name = jni::DecodeArtMethod(                                              \
           CacheMethod(env, java_lang_StringFactory, true, new_java_name, new_signature));
       STRING_INIT_LIST(LOAD_STRING_INIT)
   #undef LOAD_STRING_INIT
@@ -235,8 +240,8 @@
 ArtMethod* WellKnownClasses::StringInitToStringFactory(ArtMethod* string_init) {
   #define TO_STRING_FACTORY(init_runtime_name, init_signature, new_runtime_name,            \
                             new_java_name, new_signature, entry_point_name)                 \
-      if (string_init == init_runtime_name) {                                               \
-        return new_runtime_name;                                                            \
+      if (string_init == (init_runtime_name)) {                                             \
+        return (new_runtime_name);                                                          \
       }
       STRING_INIT_LIST(TO_STRING_FACTORY)
   #undef TO_STRING_FACTORY
@@ -247,7 +252,7 @@
 uint32_t WellKnownClasses::StringInitToEntryPoint(ArtMethod* string_init) {
   #define TO_ENTRY_POINT(init_runtime_name, init_signature, new_runtime_name,               \
                          new_java_name, new_signature, entry_point_name)                    \
-      if (string_init == init_runtime_name) {                                               \
+      if (string_init == (init_runtime_name)) {                                             \
         return kQuick ## entry_point_name;                                                  \
       }
       STRING_INIT_LIST(TO_ENTRY_POINT)
@@ -262,9 +267,12 @@
   dalvik_annotation_optimization_CriticalNative =
       CacheClass(env, "dalvik/annotation/optimization/CriticalNative");
   dalvik_annotation_optimization_FastNative = CacheClass(env, "dalvik/annotation/optimization/FastNative");
+  dalvik_system_BaseDexClassLoader = CacheClass(env, "dalvik/system/BaseDexClassLoader");
+  dalvik_system_DexClassLoader = CacheClass(env, "dalvik/system/DexClassLoader");
   dalvik_system_DexFile = CacheClass(env, "dalvik/system/DexFile");
   dalvik_system_DexPathList = CacheClass(env, "dalvik/system/DexPathList");
   dalvik_system_DexPathList__Element = CacheClass(env, "dalvik/system/DexPathList$Element");
+  dalvik_system_EmulatedStackFrame = CacheClass(env, "dalvik/system/EmulatedStackFrame");
   dalvik_system_PathClassLoader = CacheClass(env, "dalvik/system/PathClassLoader");
   dalvik_system_VMRuntime = CacheClass(env, "dalvik/system/VMRuntime");
 
@@ -329,9 +337,9 @@
   org_apache_harmony_dalvik_ddmc_DdmServer_broadcast = CacheMethod(env, org_apache_harmony_dalvik_ddmc_DdmServer, true, "broadcast", "(I)V");
   org_apache_harmony_dalvik_ddmc_DdmServer_dispatch = CacheMethod(env, org_apache_harmony_dalvik_ddmc_DdmServer, true, "dispatch", "(I[BII)Lorg/apache/harmony/dalvik/ddmc/Chunk;");
 
+  dalvik_system_BaseDexClassLoader_pathList = CacheField(env, dalvik_system_BaseDexClassLoader, false, "pathList", "Ldalvik/system/DexPathList;");
   dalvik_system_DexFile_cookie = CacheField(env, dalvik_system_DexFile, false, "mCookie", "Ljava/lang/Object;");
   dalvik_system_DexFile_fileName = CacheField(env, dalvik_system_DexFile, false, "mFileName", "Ljava/lang/String;");
-  dalvik_system_PathClassLoader_pathList = CacheField(env, dalvik_system_PathClassLoader, false, "pathList", "Ldalvik/system/DexPathList;");
   dalvik_system_DexPathList_dexElements = CacheField(env, dalvik_system_DexPathList, false, "dexElements", "[Ldalvik/system/DexPathList$Element;");
   dalvik_system_DexPathList__Element_dexFile = CacheField(env, dalvik_system_DexPathList__Element, false, "dexFile", "Ldalvik/system/DexFile;");
   java_lang_Thread_daemon = CacheField(env, java_lang_Thread, false, "daemon", "Z");
@@ -385,8 +393,10 @@
                       "Ljava/lang/String;");
 }
 
-mirror::Class* WellKnownClasses::ToClass(jclass global_jclass) {
-  return reinterpret_cast<mirror::Class*>(Thread::Current()->DecodeJObject(global_jclass));
+ObjPtr<mirror::Class> WellKnownClasses::ToClass(jclass global_jclass) {
+  auto ret = ObjPtr<mirror::Class>::DownCast(Thread::Current()->DecodeJObject(global_jclass));
+  DCHECK(!ret.IsNull());
+  return ret;
 }
 
 }  // namespace art
diff --git a/runtime/well_known_classes.h b/runtime/well_known_classes.h
index ddfc5b8..227996a 100644
--- a/runtime/well_known_classes.h
+++ b/runtime/well_known_classes.h
@@ -19,6 +19,7 @@
 
 #include "base/mutex.h"
 #include "jni.h"
+#include "obj_ptr.h"
 
 namespace art {
 
@@ -41,15 +42,17 @@
   static ArtMethod* StringInitToStringFactory(ArtMethod* method);
   static uint32_t StringInitToEntryPoint(ArtMethod* method);
 
-  static mirror::Class* ToClass(jclass global_jclass)
-      REQUIRES_SHARED(Locks::mutator_lock_);
+  static ObjPtr<mirror::Class> ToClass(jclass global_jclass) REQUIRES_SHARED(Locks::mutator_lock_);
 
   static jclass com_android_dex_Dex;
   static jclass dalvik_annotation_optimization_CriticalNative;
   static jclass dalvik_annotation_optimization_FastNative;
+  static jclass dalvik_system_BaseDexClassLoader;
+  static jclass dalvik_system_DexClassLoader;
   static jclass dalvik_system_DexFile;
   static jclass dalvik_system_DexPathList;
   static jclass dalvik_system_DexPathList__Element;
+  static jclass dalvik_system_EmulatedStackFrame;
   static jclass dalvik_system_PathClassLoader;
   static jclass dalvik_system_VMRuntime;
   static jclass java_lang_annotation_Annotation__array;
@@ -118,11 +121,11 @@
   static jmethodID org_apache_harmony_dalvik_ddmc_DdmServer_broadcast;
   static jmethodID org_apache_harmony_dalvik_ddmc_DdmServer_dispatch;
 
+  static jfieldID dalvik_system_BaseDexClassLoader_pathList;
   static jfieldID dalvik_system_DexFile_cookie;
   static jfieldID dalvik_system_DexFile_fileName;
   static jfieldID dalvik_system_DexPathList_dexElements;
   static jfieldID dalvik_system_DexPathList__Element_dexFile;
-  static jfieldID dalvik_system_PathClassLoader_pathList;
   static jfieldID java_lang_reflect_Executable_artMethod;
   static jfieldID java_lang_reflect_Proxy_h;
   static jfieldID java_lang_Thread_daemon;
diff --git a/test/021-string2/src/Main.java b/test/021-string2/src/Main.java
index d1ea0b1..51351e1 100644
--- a/test/021-string2/src/Main.java
+++ b/test/021-string2/src/Main.java
@@ -89,5 +89,450 @@
         Method fromUTF8ByteArray = Strings.getDeclaredMethod("fromUTF8ByteArray", byte[].class);
         String result = (String) fromUTF8ByteArray.invoke(null, new byte[] {'O', 'K'});
         System.out.println(result);
+
+        testCompareToAndEquals();
+        testIndexOf();
     }
+
+    public static void testCompareToAndEquals() {
+        String[] strings = {
+                // Special: empty string.
+                "",
+                // Category 0, ASCII strings:
+                //     "0123456789abcdef".substring(0, index + 1)
+                "0",
+                "01",
+                "012",
+                "0123",
+                "01234",
+                "012345",
+                "0123456",
+                "01234567",
+                "012345678",
+                "0123456789",
+                "0123456789a",
+                "0123456789ab",
+                "0123456789abc",
+                "0123456789abcd",
+                "0123456789abcde",
+                "0123456789abcdef",
+                // Category 1, ASCII strings:
+                //     "0123456789abcdef".substring(0, index) + "x"
+                "x",
+                "0x",
+                "01x",
+                "012x",
+                "0123x",
+                "01234x",
+                "012345x",
+                "0123456x",
+                "01234567x",
+                "012345678x",
+                "0123456789x",
+                "0123456789ax",
+                "0123456789abx",
+                "0123456789abcx",
+                "0123456789abcdx",
+                "0123456789abcdex",
+                // Category 2, ASCII strings,
+                //     "0123456789abcdef".substring(0, index) + "x" +
+                //     "0123456789abcdef".substring(index + 1)
+                "x123456789abcdef",
+                "0x23456789abcdef",
+                "01x3456789abcdef",
+                "012x456789abcdef",
+                "0123x56789abcdef",
+                "01234x6789abcdef",
+                "012345x789abcdef",
+                "0123456x89abcdef",
+                "01234567x9abcdef",
+                "012345678xabcdef",
+                "0123456789xbcdef",
+                "0123456789axcdef",
+                "0123456789abxdef",
+                "0123456789abcxef",
+                "0123456789abcdxf",
+                "0123456789abcdex",
+                // Category 3, ASCII strings:
+                //     "z" + "0123456789abcdef".substring(1, index + 1)
+                "z",
+                "z1",
+                "z12",
+                "z123",
+                "z1234",
+                "z12345",
+                "z123456",
+                "z1234567",
+                "z12345678",
+                "z123456789",
+                "z123456789a",
+                "z123456789ab",
+                "z123456789abc",
+                "z123456789abcd",
+                "z123456789abcde",
+                "z123456789abcdef",
+                // Category 4, non-ASCII strings:
+                //     "0123456789abcdef".substring(0, index) + "\u0440"
+                "\u0440",
+                "0\u0440",
+                "01\u0440",
+                "012\u0440",
+                "0123\u0440",
+                "01234\u0440",
+                "012345\u0440",
+                "0123456\u0440",
+                "01234567\u0440",
+                "012345678\u0440",
+                "0123456789\u0440",
+                "0123456789a\u0440",
+                "0123456789ab\u0440",
+                "0123456789abc\u0440",
+                "0123456789abcd\u0440",
+                "0123456789abcde\u0440",
+                // Category 5, non-ASCII strings:
+                //     "0123456789abcdef".substring(0, index) + "\u0440" +
+                //     "0123456789abcdef".substring(index + 1)
+                "\u0440123456789abcdef",
+                "0\u044023456789abcdef",
+                "01\u04403456789abcdef",
+                "012\u0440456789abcdef",
+                "0123\u044056789abcdef",
+                "01234\u04406789abcdef",
+                "012345\u0440789abcdef",
+                "0123456\u044089abcdef",
+                "01234567\u04409abcdef",
+                "012345678\u0440abcdef",
+                "0123456789\u0440bcdef",
+                "0123456789a\u0440cdef",
+                "0123456789ab\u0440def",
+                "0123456789abc\u0440ef",
+                "0123456789abcd\u0440f",
+                "0123456789abcde\u0440",
+                // Category 6, ASCII strings:
+                //     "\u0443" + "0123456789abcdef".substring(1, index + 1)
+                "\u0443",
+                "\u04431",
+                "\u044312",
+                "\u0443123",
+                "\u04431234",
+                "\u044312345",
+                "\u0443123456",
+                "\u04431234567",
+                "\u044312345678",
+                "\u0443123456789",
+                "\u0443123456789a",
+                "\u0443123456789ab",
+                "\u0443123456789abc",
+                "\u0443123456789abcd",
+                "\u0443123456789abcde",
+                "\u0443123456789abcdef",
+                // Category 7, non-ASCII strings:
+                //     "0123456789abcdef".substring(0, index) + "\u0482"
+                "\u0482",
+                "0\u0482",
+                "01\u0482",
+                "012\u0482",
+                "0123\u0482",
+                "01234\u0482",
+                "012345\u0482",
+                "0123456\u0482",
+                "01234567\u0482",
+                "012345678\u0482",
+                "0123456789\u0482",
+                "0123456789a\u0482",
+                "0123456789ab\u0482",
+                "0123456789abc\u0482",
+                "0123456789abcd\u0482",
+                "0123456789abcde\u0482",
+                // Category 8, non-ASCII strings:
+                //     "0123456789abcdef".substring(0, index) + "\u0482" +
+                //     "0123456789abcdef".substring(index + 1)
+                "\u0482123456789abcdef",
+                "0\u048223456789abcdef",
+                "01\u04823456789abcdef",
+                "012\u0482456789abcdef",
+                "0123\u048256789abcdef",
+                "01234\u04826789abcdef",
+                "012345\u0482789abcdef",
+                "0123456\u048289abcdef",
+                "01234567\u04829abcdef",
+                "012345678\u0482abcdef",
+                "0123456789\u0482bcdef",
+                "0123456789a\u0482cdef",
+                "0123456789ab\u0482def",
+                "0123456789abc\u0482ef",
+                "0123456789abcd\u0482f",
+                "0123456789abcde\u0482",
+                // Category 9, ASCII strings:
+                //     "\u0489" + "0123456789abcdef".substring(1, index + 1)
+                "\u0489",
+                "\u04891",
+                "\u048912",
+                "\u0489123",
+                "\u04891234",
+                "\u048912345",
+                "\u0489123456",
+                "\u04891234567",
+                "\u048912345678",
+                "\u0489123456789",
+                "\u0489123456789a",
+                "\u0489123456789ab",
+                "\u0489123456789abc",
+                "\u0489123456789abcd",
+                "\u0489123456789abcde",
+                "\u0489123456789abcdef",
+        };
+        int length = strings.length;
+        Assert.assertEquals(1 + 16 * 10, length);
+        for (int i = 0; i != length; ++i) {
+            String lhs = strings[i];
+            for (int j = 0; j != length; ++j) {
+                String rhs = strings[j];
+                int result = $noinline$compareTo(lhs, rhs);
+                final int expected;
+                if (i == 0 || j == 0 || i == j) {
+                    // One of the strings is empty or the strings are the same.
+                    expected = lhs.length() - rhs.length();
+                } else {
+                    int i_category = (i - 1) / 16;
+                    int i_index = (i - 1) % 16;
+                    int j_category = (j - 1) / 16;
+                    int j_index = (j - 1) % 16;
+                    int min_ij_index = (i_index < j_index) ? i_index : j_index;
+                    if (i_category == j_category) {
+                        switch (i_category) {
+                            case 0: case 3: case 6: case 9:
+                                // Differs in length.
+                                expected = lhs.length() - rhs.length();
+                                break;
+                            case 1: case 2: case 4: case 5: case 7: case 8:
+                                // Differs in charAt(min_ij_index).
+                                expected = lhs.charAt(min_ij_index) - rhs.charAt(min_ij_index);
+                                break;
+                            default: throw new Error("Unexpected category.");
+                      }
+                    } else if (i_category == 3 || i_category == 6 || i_category == 9 ||
+                               j_category == 3 || j_category == 6 || j_category == 9) {
+                        // In these categories, charAt(0) differs from other categories' strings.
+                        expected = lhs.charAt(0) - rhs.charAt(0);
+                    } else if (// Category 0 string is a prefix to any longer string in
+                               // remaining categories.
+                               (i_category == 0 && i_index < j_index) ||
+                               (j_category == 0 && j_index < i_index) ||
+                               // Category 2 string is a prefix to category 3 string at the same
+                               // index. Similar for categories 4 and 5 and also 7 and 8.
+                               // This includes matching last strings of these pairs of categories.
+                               (i_index == j_index &&
+                                   ((i_category == 1 && j_category == 2) ||
+                                    (i_category == 2 && j_category == 1) ||
+                                    (i_category == 4 && j_category == 5) ||
+                                    (i_category == 5 && j_category == 4) ||
+                                    (i_category == 7 && j_category == 8) ||
+                                    (i_category == 8 && j_category == 7)))) {
+                        // Differs in length.
+                        expected = lhs.length() - rhs.length();
+                    } else {
+                        // The remaining cases differ in charAt(min_ij_index), the characters
+                        // before that are "0123456789abcdef".substring(0, min_ij_index).
+                        for (int k = 0; k < min_ij_index; ++k) {
+                          Assert.assertEquals("0123456789abcdef".charAt(k), lhs.charAt(k));
+                          Assert.assertEquals("0123456789abcdef".charAt(k), rhs.charAt(k));
+                        }
+                        expected = lhs.charAt(min_ij_index) - rhs.charAt(min_ij_index);
+                        Assert.assertFalse(expected == 0);
+                    }
+                }
+                if (expected != result) {
+                  throw new Error(
+                      "Mismatch at i=" + i + ", j=" + j + ", expected=" + expected +
+                      ", result=" + result);
+                }
+                boolean equalsExpected =
+                    (i == j) ||
+                    // Last string in categories 1 and 2.
+                    (i == 32 && j == 48) || (i == 48 && j == 32) ||
+                    // Last string in categories 4 and 5.
+                    (i == 80 && j == 96) || (i == 96 && j == 80) ||
+                    // Last string in categories 7 and 8.
+                    (i == 128 && j == 144) || (i == 144 && j == 128);
+                Assert.assertEquals(equalsExpected, $noinline$equals(lhs, rhs));
+            }
+        }
+
+        try {
+            $noinline$compareTo("", null);
+            Assert.fail();
+        } catch (NullPointerException expected) {
+        }
+        try {
+            $noinline$compareTo(null, "");
+            Assert.fail();
+        } catch (NullPointerException expected) {
+        }
+
+        Assert.assertFalse($noinline$equals("", null));
+        try {
+            $noinline$equals(null, "");
+            Assert.fail();
+        } catch (NullPointerException expected) {
+        }
+    }
+
+    public static void testIndexOf() {
+        String[] prefixes = {
+                "",
+                "0",
+                "01",
+                "012",
+                "0123",
+                "01234",
+                "012345",
+                "0123456",
+                "01234567",
+                "012345678",
+                "0123456789",
+                "0123456789a",
+                "0123456789ab",
+                "0123456789abc",
+                "0123456789abcd",
+                "0123456789abcdef",
+        };
+        String[] cores = {
+                "",
+                "x",
+                "xx",
+                "xxx",
+                "xxxx",
+                "xxxxx",
+                "xxxxxx",
+                "xxxxxxx",
+                "xxxxxxxx",
+                "xzx",
+                "xxzx",
+                "xxxzx",
+                "xxxxzx",
+                "xxxxxzx",
+                "xxxxxxzx",
+                "xxxxxxxzx",
+                "xxxxxxxxzx",
+                "\u0440",
+                "\u0440\u0440",
+                "\u0440\u0440\u0440",
+                "\u0440\u0440\u0440\u0440",
+                "\u0440\u0440\u0440\u0440\u0440",
+                "\u0440\u0440\u0440\u0440\u0440\u0440",
+                "\u0440\u0440\u0440\u0440\u0440\u0440\u0440",
+                "\u0440\u0440\u0440\u0440\u0440\u0440\u0440\u0440",
+                "\u0440z\u0440",
+                "\u0440\u0440z\u0440",
+                "\u0440\u0440\u0440z\u0440",
+                "\u0440\u0440\u0440\u0440z\u0440",
+                "\u0440\u0440\u0440\u0440\u0440z\u0440",
+                "\u0440\u0440\u0440\u0440\u0440\u0440z\u0440",
+                "\u0440\u0440\u0440\u0440\u0440\u0440\u0440z\u0440",
+                "\u0440\u0440\u0440\u0440\u0440\u0440\u0440\u0440z\u0440",
+                "\u0000",
+                "\u0000\u0000",
+                "\u0000\u0000\u0000",
+                "\u0000\u0000\u0000\u0000",
+                "\u0000\u0000\u0000\u0000\u0000",
+                "\u0000\u0000\u0000\u0000\u0000\u0000",
+                "\u0000\u0000\u0000\u0000\u0000\u0000\u0000",
+                "\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000",
+                "\u0000z\u0000",
+                "\u0000\u0000z\u0000",
+                "\u0000\u0000\u0000z\u0000",
+                "\u0000\u0000\u0000\u0000z\u0000",
+                "\u0000\u0000\u0000\u0000\u0000z\u0000",
+                "\u0000\u0000\u0000\u0000\u0000\u0000z\u0000",
+                "\u0000\u0000\u0000\u0000\u0000\u0000\u0000z\u0000",
+                "\u0000\u0000\u0000\u0000\u0000\u0000\u0000\u0000z\u0000",
+        };
+        String[] suffixes = {
+                "",
+                "y",
+                "yy",
+                "yyy",
+                "yyyy",
+                "yyyyy",
+                "yyyyyy",
+                "yyyyyyy",
+                "yyyyyyyy",
+                "\u0441",
+                "y\u0441",
+                "yy\u0441",
+                "yyy\u0441",
+                "yyyy\u0441",
+                "yyyyy\u0441",
+                "yyyyyy\u0441",
+                "yyyyyyy\u0441",
+                "yyyyyyyy\u0441",
+        };
+        for (String p : prefixes) {
+            for (String c : cores) {
+                for (String s : suffixes) {
+                    String full = p + c + s;
+                    int expX = (c.isEmpty() || c.charAt(0) != 'x') ? -1 : p.length();
+                    int exp0440 = (c.isEmpty() || c.charAt(0) != '\u0440') ? -1 : p.length();
+                    int exp0000 = (c.isEmpty() || c.charAt(0) != '\u0000') ? -1 : p.length();
+                    Assert.assertEquals(expX, $noinline$indexOf(full, 'x'));
+                    Assert.assertEquals(exp0440, $noinline$indexOf(full, '\u0440'));
+                    Assert.assertEquals(exp0000, $noinline$indexOf(full, '\u0000'));
+                    Assert.assertEquals(expX, $noinline$indexOf(full, 'x', -1));
+                    Assert.assertEquals(exp0440, $noinline$indexOf(full, '\u0440', -1));
+                    Assert.assertEquals(exp0000, $noinline$indexOf(full, '\u0000', -1));
+                    Assert.assertEquals(-1, $noinline$indexOf(full, 'x', full.length() + 1));
+                    Assert.assertEquals(-1, $noinline$indexOf(full, '\u0440', full.length() + 1));
+                    Assert.assertEquals(-1, $noinline$indexOf(full, '\u0000', full.length() + 1));
+                    for (int from = 0; from != full.length(); ++from) {
+                        final int eX;
+                        final int e0440;
+                        final int e0000;
+                        if (from <= p.length()) {
+                            eX = expX;
+                            e0440 = exp0440;
+                            e0000 = exp0000;
+                        } else if (from >= p.length() + c.length()) {
+                            eX = -1;
+                            e0440 = -1;
+                            e0000 = -1;
+                        } else if (full.charAt(from) == 'z') {
+                            eX = (full.charAt(from + 1) != 'x') ? -1 : from + 1;
+                            e0440 = (full.charAt(from + 1) != '\u0440') ? -1 : from + 1;
+                            e0000 = (full.charAt(from + 1) != '\u0000') ? -1 : from + 1;
+                        } else {
+                            eX = (full.charAt(from) != 'x') ? -1 : from;
+                            e0440 = (full.charAt(from) != '\u0440') ? -1 : from;
+                            e0000 = (full.charAt(from) != '\u0000') ? -1 : from;
+                        }
+                        Assert.assertEquals(eX, $noinline$indexOf(full, 'x', from));
+                        Assert.assertEquals(e0440, $noinline$indexOf(full, '\u0440', from));
+                        Assert.assertEquals(e0000, $noinline$indexOf(full, '\u0000', from));
+                    }
+                }
+            }
+        }
+    }
+
+    public static int $noinline$compareTo(String lhs, String rhs) {
+        if (doThrow) { throw new Error(); }
+        return lhs.compareTo(rhs);
+    }
+
+    public static boolean $noinline$equals(String lhs, String rhs) {
+        if (doThrow) { throw new Error(); }
+        return lhs.equals(rhs);
+    }
+
+    public static int $noinline$indexOf(String lhs, int ch) {
+        if (doThrow) { throw new Error(); }
+        return lhs.indexOf(ch);
+    }
+
+    public static int $noinline$indexOf(String lhs, int ch, int fromIndex) {
+        if (doThrow) { throw new Error(); }
+        return lhs.indexOf(ch, fromIndex);
+    }
+
+    public static boolean doThrow = false;
 }
diff --git a/test/030-bad-finalizer/expected.txt b/test/030-bad-finalizer/expected.txt
index ee9cfff..74e208c 100644
--- a/test/030-bad-finalizer/expected.txt
+++ b/test/030-bad-finalizer/expected.txt
@@ -1,4 +1,4 @@
-About to null reference and request GC.
+About to null reference.
 Finalizer started and spinning...
 Finalizer done spinning.
 Finalizer sleeping forever now.
diff --git a/test/030-bad-finalizer/src/Main.java b/test/030-bad-finalizer/src/Main.java
index 942ee25..0e69a96 100644
--- a/test/030-bad-finalizer/src/Main.java
+++ b/test/030-bad-finalizer/src/Main.java
@@ -14,26 +14,60 @@
  * limitations under the License.
  */
 
+import java.util.concurrent.CountDownLatch;
+import static java.util.concurrent.TimeUnit.MINUTES;
+
 /**
  * Test a class with a bad finalizer.
+ *
+ * This test is inherently flaky. It assumes that the system will schedule the finalizer daemon
+ * and finalizer watchdog daemon enough to reach the timeout and throwing the fatal exception.
  */
 public class Main {
-    public static void main(String[] args) {
-        BadFinalizer bf = new BadFinalizer();
+    public static void main(String[] args) throws Exception {
+        CountDownLatch finalizerWait = new CountDownLatch(1);
 
-        System.out.println("About to null reference and request GC.");
-        bf = null;
-        Runtime.getRuntime().gc();
+        // A separate method to ensure no dex register keeps the object alive.
+        createBadFinalizer(finalizerWait);
 
-        for (int i = 0; i < 8; i++) {
-            snooze(4000);
+        // Should have at least two iterations to trigger finalization, but just to make sure run
+        // some more.
+        for (int i = 0; i < 5; i++) {
             Runtime.getRuntime().gc();
         }
 
+        // Now wait for the finalizer to start running. Give it a minute.
+        finalizerWait.await(1, MINUTES);
+
+        // Now fall asleep with a timeout. The timeout is large enough that we expect the
+        // finalizer daemon to have killed the process before the deadline elapses.
+        // Note: the timeout is here (instead of an infinite sleep) to protect the test
+        //       environment (e.g., in case this is run without a timeout wrapper).
+        final long timeout = 60 * 1000;  // 1 minute.
+        long remainingWait = timeout;
+        final long waitStart = System.currentTimeMillis();
+        while (remainingWait > 0) {
+            synchronized (args) {  // Just use an already existing object for simplicity...
+                try {
+                    args.wait(remainingWait);
+                } catch (Exception e) {
+                }
+            }
+            remainingWait = timeout - (System.currentTimeMillis() - waitStart);
+        }
+
+        // We should not get here.
         System.out.println("UNREACHABLE");
         System.exit(0);
     }
 
+    private static void createBadFinalizer(CountDownLatch finalizerWait) {
+        BadFinalizer bf = new BadFinalizer(finalizerWait);
+
+        System.out.println("About to null reference.");
+        bf = null;  // Not that this would make a difference, could be eliminated earlier.
+    }
+
     public static void snooze(int ms) {
         try {
             Thread.sleep(ms);
@@ -45,9 +79,17 @@
      * Class with a bad finalizer.
      */
     public static class BadFinalizer {
+        private CountDownLatch finalizerWait;
+        private volatile int j = 0;  // Volatile in an effort to curb loop optimization.
+
+        public BadFinalizer(CountDownLatch finalizerWait) {
+            this.finalizerWait = finalizerWait;
+        }
+
         protected void finalize() {
+            finalizerWait.countDown();
+
             System.out.println("Finalizer started and spinning...");
-            int j = 0;
 
             /* spin for a bit */
             long start, end;
diff --git a/test/039-join-main/src/Main.java b/test/039-join-main/src/Main.java
index 2373221..60791e4 100644
--- a/test/039-join-main/src/Main.java
+++ b/test/039-join-main/src/Main.java
@@ -14,35 +14,48 @@
  * limitations under the License.
  */
 
+import java.util.concurrent.CountDownLatch;
+
 /**
  * Make sure that a sub-thread can join the main thread.
  */
 public class Main {
-    public static void main(String[] args) {
+    public static void main(String[] args) throws Exception {
         Thread t;
+        CountDownLatch waitLatch = new CountDownLatch(1);
+        CountDownLatch progressLatch = new CountDownLatch(1);
 
-        t = new Thread(new JoinMainSub(Thread.currentThread()), "Joiner");
+        t = new Thread(new JoinMainSub(Thread.currentThread(), waitLatch, progressLatch), "Joiner");
         System.out.print("Starting thread '" + t.getName() + "'\n");
         t.start();
 
-        try { Thread.sleep(1000); }
-        catch (InterruptedException ie) {}
-
+        waitLatch.await();
         System.out.print("JoinMain starter returning\n");
+        progressLatch.countDown();
+
+        // Keep the thread alive a little longer, giving the other thread a chance to join on a
+        // live thread (though that isn't critically important for the test).
+        Thread.currentThread().sleep(500);
     }
 }
 
 class JoinMainSub implements Runnable {
     private Thread mJoinMe;
+    private CountDownLatch waitLatch;
+    private CountDownLatch progressLatch;
 
-    public JoinMainSub(Thread joinMe) {
+    public JoinMainSub(Thread joinMe, CountDownLatch waitLatch, CountDownLatch progressLatch) {
         mJoinMe = joinMe;
+        this.waitLatch = waitLatch;
+        this.progressLatch = progressLatch;
     }
 
     public void run() {
         System.out.print("@ JoinMainSub running\n");
 
         try {
+            waitLatch.countDown();
+            progressLatch.await();
             mJoinMe.join();
             System.out.print("@ JoinMainSub successfully joined main\n");
         } catch (InterruptedException ie) {
diff --git a/test/115-native-bridge/nativebridge.cc b/test/115-native-bridge/nativebridge.cc
index aca356b..41329af 100644
--- a/test/115-native-bridge/nativebridge.cc
+++ b/test/115-native-bridge/nativebridge.cc
@@ -370,7 +370,7 @@
 
 // v2 parts.
 
-extern "C" bool nb_is_compatible(uint32_t bridge_version ATTRIBUTE_UNUSED) {
+extern "C" bool native_bridge_isCompatibleWith(uint32_t bridge_version ATTRIBUTE_UNUSED) {
   return true;
 }
 
@@ -453,7 +453,7 @@
   return true;
 }
 
-static ::android::NativeBridgeSignalHandlerFn native_bridge_get_signal_handler(int signal) {
+static ::android::NativeBridgeSignalHandlerFn native_bridge_getSignalHandler(int signal) {
   // Test segv for already claimed signal, and sigill for not claimed signal
   if ((signal == SIGSEGV) || (signal == SIGILL)) {
     return &nb_signalhandler;
@@ -461,16 +461,63 @@
   return nullptr;
 }
 
+extern "C" int native_bridge_unloadLibrary(void* handle ATTRIBUTE_UNUSED) {
+  printf("dlclose() in native bridge.\n");
+  return 0;
+}
+
+extern "C" const char* native_bridge_getError() {
+  printf("dlerror() in native bridge.\n");
+  return nullptr;
+}
+
+extern "C" bool native_bridge_isPathSupported(const char* library_path ATTRIBUTE_UNUSED) {
+  printf("Checking for path support in native bridge.\n");
+  return false;
+}
+
+extern "C" bool native_bridge_initNamespace(const char*  public_ns_sonames ATTRIBUTE_UNUSED,
+                                            const char*  anon_ns_library_path ATTRIBUTE_UNUSED) {
+  printf("Initializing namespaces in native bridge.\n");
+  return false;
+}
+
+extern "C" android::native_bridge_namespace_t*
+native_bridge_createNamespace(const char* name ATTRIBUTE_UNUSED,
+                              const char* ld_library_path ATTRIBUTE_UNUSED,
+                              const char* default_library_path ATTRIBUTE_UNUSED,
+                              uint64_t type ATTRIBUTE_UNUSED,
+                              const char* permitted_when_isolated_path ATTRIBUTE_UNUSED,
+                              android::native_bridge_namespace_t* parent_ns ATTRIBUTE_UNUSED) {
+  printf("Creating namespace in native bridge.\n");
+  return nullptr;
+}
+
+extern "C" void* native_bridge_loadLibraryExt(const char* libpath ATTRIBUTE_UNUSED,
+                                               int flag ATTRIBUTE_UNUSED,
+                                               android::native_bridge_namespace_t* ns ATTRIBUTE_UNUSED) {
+    printf("Loading library with Extension in native bridge.\n");
+    return nullptr;
+}
 
 // "NativeBridgeItf" is effectively an API (it is the name of the symbol that will be loaded
 // by the native bridge library).
 android::NativeBridgeCallbacks NativeBridgeItf {
-  .version = 2,
+  // v1
+  .version = 3,
   .initialize = &native_bridge_initialize,
   .loadLibrary = &native_bridge_loadLibrary,
   .getTrampoline = &native_bridge_getTrampoline,
   .isSupported = &native_bridge_isSupported,
   .getAppEnv = &native_bridge_getAppEnv,
-  .isCompatibleWith = &nb_is_compatible,
-  .getSignalHandler = &native_bridge_get_signal_handler
+  // v2
+  .isCompatibleWith = &native_bridge_isCompatibleWith,
+  .getSignalHandler = &native_bridge_getSignalHandler,
+  // v3
+  .unloadLibrary = &native_bridge_unloadLibrary,
+  .getError = &native_bridge_getError,
+  .isPathSupported = &native_bridge_isPathSupported,
+  .initNamespace = &native_bridge_initNamespace,
+  .createNamespace = &native_bridge_createNamespace,
+  .loadLibraryExt = &native_bridge_loadLibraryExt
 };
diff --git a/test/130-hprof/src/Main.java b/test/130-hprof/src/Main.java
index 57be3a7..5899dd1 100644
--- a/test/130-hprof/src/Main.java
+++ b/test/130-hprof/src/Main.java
@@ -87,6 +87,12 @@
     }
 
     public static void main(String[] args) throws Exception {
+        testBasicDump();
+        testAllocationTrackingAndClassUnloading();
+        testGcAndDump();
+    }
+
+    private static void testBasicDump() throws Exception {
         // Create some data.
         Object data[] = new Object[TEST_LENGTH];
         for (int i = 0; i < data.length; i++) {
@@ -103,8 +109,10 @@
             }
         }
         System.out.println("Generated data.");
-
         createDumpAndConv();
+    }
+
+    private static void testAllocationTrackingAndClassUnloading() throws Exception {
         Class<?> klass = Class.forName("org.apache.harmony.dalvik.ddmc.DdmVmInternal");
         if (klass == null) {
             throw new AssertionError("Couldn't find path class loader class");
@@ -123,6 +131,57 @@
         enableMethod.invoke(null, false);
     }
 
+    private static void testGcAndDump() throws Exception {
+        Allocator allocator = new Allocator();
+        Dumper dumper = new Dumper(allocator);
+        allocator.start();
+        dumper.start();
+        try {
+            allocator.join();
+            dumper.join();
+        } catch (InterruptedException e) {
+            System.err.println("join interrupted");
+        }
+    }
+
+    private static class Allocator extends Thread {
+        private static int ARRAY_SIZE = 1024;
+        public volatile boolean running = true;
+        public void run() {
+            Object[] array = new Object[ARRAY_SIZE];
+            int i = 0;
+            while (running) {
+                array[i] = new byte[1024];
+                if (i % ARRAY_SIZE == 0) {
+                    Main.sleep(100L);
+                }
+                i = (i + 1) % ARRAY_SIZE;
+            }
+        }
+    }
+
+    private static class Dumper extends Thread {
+        Dumper(Allocator allocator) {
+            this.allocator = allocator;
+        }
+        Allocator allocator;
+        public void run() {
+            for (int i = 0; i < 5; ++i) {
+                Main.sleep(1000L);
+                createDumpAndConv();
+            }
+            allocator.running = false;
+        }
+    }
+
+    public static void sleep(long ms) {
+        try {
+            Thread.sleep(ms);
+        } catch (InterruptedException e) {
+            System.err.println("sleep interrupted");
+        }
+    }
+
     private static File getHprofConf() {
         // Use the java.library.path. It points to the lib directory.
         File libDir = new File(System.getProperty("java.library.path").split(":")[0]);
diff --git a/test/141-class-unload/expected.txt b/test/141-class-unload/expected.txt
index 2b77b29..0a03ecb 100644
--- a/test/141-class-unload/expected.txt
+++ b/test/141-class-unload/expected.txt
@@ -21,3 +21,4 @@
 class null false test
 JNI_OnUnload called
 Number of loaded unload-ex maps 0
+Too small false
diff --git a/test/141-class-unload/src/Main.java b/test/141-class-unload/src/Main.java
index f9b6180..2a6e944 100644
--- a/test/141-class-unload/src/Main.java
+++ b/test/141-class-unload/src/Main.java
@@ -47,6 +47,8 @@
             stressTest(constructor);
             // Test that the oat files are unloaded.
             testOatFilesUnloaded(getPid());
+            // Test that objects keep class loader live for sticky GC.
+            testStickyUnload(constructor);
         } catch (Exception e) {
             e.printStackTrace();
         }
@@ -161,6 +163,30 @@
         return intHolder;
     }
 
+    private static Object allocObjectInOtherClassLoader(Constructor<?> constructor)
+            throws Exception {
+      ClassLoader loader = (ClassLoader) constructor.newInstance(
+              DEX_FILE, LIBRARY_SEARCH_PATH, ClassLoader.getSystemClassLoader());
+      return loader.loadClass("IntHolder").newInstance();
+    }
+
+    // Regression test for public issue 227182.
+    private static void testStickyUnload(Constructor<?> constructor) throws Exception {
+        String s = "";
+        for (int i = 0; i < 10; ++i) {
+            s = "";
+            // The object is the only thing preventing the class loader from being unloaded.
+            Object o = allocObjectInOtherClassLoader(constructor);
+            for (int j = 0; j < 1000; ++j) {
+                s += j + " ";
+            }
+            // Make sure the object still has a valid class (hasn't been incorrectly unloaded).
+            s += o.getClass().getName();
+            o = null;
+        }
+        System.out.println("Too small " + (s.length() < 1000));
+    }
+
     private static WeakReference<Class> setUpUnloadClassWeak(Constructor<?> constructor)
             throws Exception {
         return new WeakReference<Class>(setUpUnloadClass(constructor));
diff --git a/test/151-OpenFileLimit/expected.txt b/test/151-OpenFileLimit/expected.txt
new file mode 100644
index 0000000..6bc45ef
--- /dev/null
+++ b/test/151-OpenFileLimit/expected.txt
@@ -0,0 +1,3 @@
+Message includes "Too many open files"
+thread run.
+done.
diff --git a/test/151-OpenFileLimit/info.txt b/test/151-OpenFileLimit/info.txt
new file mode 100644
index 0000000..9af393d
--- /dev/null
+++ b/test/151-OpenFileLimit/info.txt
@@ -0,0 +1,2 @@
+This test verifies that running out of file descriptors in the process doesn't
+prevent us from launching a new thread.
diff --git a/test/151-OpenFileLimit/run b/test/151-OpenFileLimit/run
new file mode 100755
index 0000000..5c83fd0
--- /dev/null
+++ b/test/151-OpenFileLimit/run
@@ -0,0 +1,21 @@
+#!/bin/bash
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+flags="$@"
+
+# Reduce the file descriptor limit so the test will reach the limit sooner.
+ulimit -n 512
+${RUN} ${flags}
diff --git a/test/151-OpenFileLimit/src/Main.java b/test/151-OpenFileLimit/src/Main.java
new file mode 100644
index 0000000..9fe47c8
--- /dev/null
+++ b/test/151-OpenFileLimit/src/Main.java
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import static java.nio.file.StandardOpenOption.*;
+import java.nio.file.*;
+import java.io.*;
+import java.util.*;
+
+public class Main {
+    private static final String TEMP_FILE_NAME_PREFIX = "oflimit";
+    private static final String TEMP_FILE_NAME_SUFFIX = ".txt";
+
+    public static void main(String[] args) throws IOException {
+
+        // Exhaust the number of open file descriptors.
+        List<File> files = new ArrayList<File>();
+        List<OutputStream> streams = new ArrayList<OutputStream>();
+        try {
+            for (int i = 0; ; i++) {
+                File file = createTempFile();
+                files.add(file);
+                streams.add(Files.newOutputStream(file.toPath(), CREATE, APPEND));
+            }
+        } catch (Throwable e) {
+            if (e.getMessage().contains("Too many open files")) {
+                System.out.println("Message includes \"Too many open files\"");
+            } else {
+                System.out.println(e.getMessage());
+            }
+        }
+
+        // Now try to create a new thread.
+        try {
+            Thread thread = new Thread() {
+                public void run() {
+                    System.out.println("thread run.");
+                }
+            };
+            thread.start();
+            thread.join();
+        } catch (Throwable e) {
+            System.out.println(e.getMessage());
+        }
+
+        for (int i = 0; i < files.size(); i++) {
+          streams.get(i).close();
+          files.get(i).delete();
+        }
+        System.out.println("done.");
+    }
+
+    private static File createTempFile() throws Exception {
+        try {
+            return  File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+        } catch (IOException e) {
+            System.setProperty("java.io.tmpdir", "/data/local/tmp");
+            try {
+                return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+            } catch (IOException e2) {
+                System.setProperty("java.io.tmpdir", "/sdcard");
+                return File.createTempFile(TEMP_FILE_NAME_PREFIX, TEMP_FILE_NAME_SUFFIX);
+            }
+        }
+    }
+}
diff --git a/test/300-package-override/expected.txt b/test/300-package-override/expected.txt
index b0aad4d..a2c3f20 100644
--- a/test/300-package-override/expected.txt
+++ b/test/300-package-override/expected.txt
@@ -1 +1,4 @@
 passed
+This should be visible!
+This should override!
+This should override!
diff --git a/test/300-package-override/src/Main.java b/test/300-package-override/src/Main.java
index ad7eaaf..a9319e3 100644
--- a/test/300-package-override/src/Main.java
+++ b/test/300-package-override/src/Main.java
@@ -18,5 +18,11 @@
   public static void main(String args[]) throws Exception {
     p1.BaseClass c = new p2.DerivedClass();
     c.run();
+    p2.DerivedClass d = new p2.DerivedClass();
+    d.bar();
+    p2.DerivedClass d2 = new p2.DerivedClass2();
+    d2.bar();
+    p2.DerivedClass2 d3 = new p2.DerivedClass2();
+    d3.bar();
   }
 }
diff --git a/test/300-package-override/src/p1/BaseClass.java b/test/300-package-override/src/p1/BaseClass.java
index 1c048ac..eea35ec 100644
--- a/test/300-package-override/src/p1/BaseClass.java
+++ b/test/300-package-override/src/p1/BaseClass.java
@@ -19,4 +19,5 @@
 public class BaseClass {
   public void run() { foo(); }
   void foo() { System.out.println("passed"); } // It should not be possible to override this.
+  void bar() { System.out.println("FAILED: This should not be called!"); }
 }
diff --git a/test/300-package-override/src/p2/DerivedClass.java b/test/300-package-override/src/p2/DerivedClass.java
index 860f50c..76f6200 100644
--- a/test/300-package-override/src/p2/DerivedClass.java
+++ b/test/300-package-override/src/p2/DerivedClass.java
@@ -18,4 +18,5 @@
 
 public class DerivedClass extends p1.BaseClass {
   void foo() { System.out.println("DerivedClass overrode package-private method!"); } // This should not override BaseClass.foo.
+  public void bar() { System.out.println("This should be visible!"); }
 }
diff --git a/test/562-no-intermediate/src/Main.java b/test/300-package-override/src/p2/DerivedClass2.java
similarity index 68%
copy from test/562-no-intermediate/src/Main.java
copy to test/300-package-override/src/p2/DerivedClass2.java
index 3b74d6f..ab55799 100644
--- a/test/562-no-intermediate/src/Main.java
+++ b/test/300-package-override/src/p2/DerivedClass2.java
@@ -14,14 +14,9 @@
  * limitations under the License.
  */
 
-public class Main {
+package p2;
 
-  /// CHECK-START-ARM64: int Main.main(String[]) register_allocator (after)
-  /// CHECK-NOT: IntermediateAddress
-  public static void main(String[] args) {
-    array[index] += Math.cos(42);
-  }
-
-  static int index = 0;
-  static double[] array = new double[2];
+// Regression test for b/32193118
+public class DerivedClass2 extends p2.DerivedClass {
+  public void bar() { System.out.println("This should override!"); }
 }
diff --git a/test/445-checker-licm/expected.txt b/test/445-checker-licm/expected.txt
index e69de29..b0aad4d 100644
--- a/test/445-checker-licm/expected.txt
+++ b/test/445-checker-licm/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/445-checker-licm/src/Main.java b/test/445-checker-licm/src/Main.java
index 061fe6e..00ce3a9 100644
--- a/test/445-checker-licm/src/Main.java
+++ b/test/445-checker-licm/src/Main.java
@@ -164,8 +164,43 @@
     return result;
   }
 
+  //
+  // All operations up to the null check can be hoisted out of the
+  // loop. The null check itself sees the induction in its environment.
+  //
+  /// CHECK-START: int Main.doWhile(int) licm (before)
+  /// CHECK-DAG: <<Add:i\d+>> Add                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:              LoadClass           loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: <<Get:l\d+>> StaticFieldGet      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:              NullCheck [<<Get>>] env:[[<<Add>>,<<Get>>,{{i\d+}}]] loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:              ArrayLength         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:              BoundsCheck         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:              ArrayGet            loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.doWhile(int) licm (after)
+  /// CHECK-NOT: LoadClass      loop:{{B\d+}}
+  /// CHECK-NOT: StaticFieldGet loop:{{B\d+}}
+  //
+  /// CHECK-START: int Main.doWhile(int) licm (after)
+  /// CHECK-DAG:              LoadClass           loop:none
+  /// CHECK-DAG: <<Get:l\d+>> StaticFieldGet      loop:none
+  /// CHECK-DAG: <<Add:i\d+>> Add                 loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:              NullCheck [<<Get>>] env:[[<<Add>>,<<Get>>,{{i\d+}}]] loop:<<Loop>> outer_loop:none
+  /// CHECK-DAG:              ArrayLength         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:              BoundsCheck         loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:              ArrayGet            loop:<<Loop>>      outer_loop:none
+  public static int doWhile(int k) {
+    int i = k;
+    do {
+      i += 2;
+    } while (staticArray[i] == 0);
+    return i;
+  }
+
   public static int staticField = 42;
 
+  public static int[] staticArray = null;
+
   public static void assertEquals(int expected, int actual) {
     if (expected != actual) {
       throw new Error("Expected " + expected + ", got " + actual);
@@ -181,5 +216,24 @@
     assertEquals(21, divAndIntrinsic(new int[] { 4, -2, 8, -3 }));
     assertEquals(45, invariantBoundIntrinsic(-10));
     assertEquals(30, invariantBodyIntrinsic(2, 3));
+
+    staticArray = null;
+    try {
+      doWhile(0);
+      throw new Error("Expected NPE");
+    } catch (NullPointerException e) {
+    }
+    staticArray = new int[5];
+    staticArray[4] = 1;
+    assertEquals(4, doWhile(-2));
+    assertEquals(4, doWhile(0));
+    assertEquals(4, doWhile(2));
+    try {
+      doWhile(1);
+      throw new Error("Expected IOOBE");
+    } catch (IndexOutOfBoundsException e) {
+    }
+
+    System.out.println("passed");
   }
 }
diff --git a/test/449-checker-bce/src/Main.java b/test/449-checker-bce/src/Main.java
index 3a56c3b..5103540 100644
--- a/test/449-checker-bce/src/Main.java
+++ b/test/449-checker-bce/src/Main.java
@@ -1048,6 +1048,8 @@
   /// CHECK: Goto
 
   void foo1(int[] array, int start, int end, boolean expectInterpreter) {
+    if (end < 0)
+      throw new Error("");
     // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null
     // check and array.length out of loop).
@@ -1086,6 +1088,8 @@
   /// CHECK: Goto
 
   void foo2(int[] array, int start, int end, boolean expectInterpreter) {
+    if (end < 0)
+      throw new Error("");
     // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null
     // check and array.length out of loop).
@@ -1124,6 +1128,8 @@
   /// CHECK: Goto
 
   void foo3(int[] array, int end, boolean expectInterpreter) {
+    if (end < 0)
+      throw new Error("");
     // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null check
     // and array.length out of loop).
@@ -1163,6 +1169,8 @@
   /// CHECK: Goto
 
   void foo4(int[] array, int end, boolean expectInterpreter) {
+    if (end < 0)
+      throw new Error("");
     // Three HDeoptimize will be added. Two for the index
     // and one for null check on array (to hoist null check
     // and array.length out of loop).
@@ -1210,6 +1218,8 @@
   /// CHECK: Goto
 
   void foo5(int[] array, int end, boolean expectInterpreter) {
+    if (end < 0)
+      throw new Error("");
     // Bounds check in this loop can be eliminated without deoptimization.
     for (int i = array.length - 1 ; i >= 0; i--) {
       array[i] = 1;
@@ -1269,6 +1279,8 @@
   /// CHECK: Goto
 
   void foo6(int[] array, int start, int end, boolean expectInterpreter) {
+    if (end < 0)
+      throw new Error("");
     for (int i = end; i >= start; i--) {
       if (expectInterpreter) {
         assertIsInterpreted();
diff --git a/test/450-checker-types/src/Main.java b/test/450-checker-types/src/Main.java
index 6e453af..ea8609e 100644
--- a/test/450-checker-types/src/Main.java
+++ b/test/450-checker-types/src/Main.java
@@ -214,11 +214,11 @@
   /// CHECK-DAG:     <<IOf:z\d+>>  InstanceOf
   /// CHECK-DAG:                   If [<<IOf>>]
 
-  /// CHECK-START: void Main.testInstanceOf_Inlined(java.lang.Object) instruction_simplifier$after_bce (before)
+  /// CHECK-START: void Main.testInstanceOf_Inlined(java.lang.Object) instruction_simplifier$after_inlining (before)
   /// CHECK:         CheckCast
   /// CHECK-NOT:     CheckCast
 
-  /// CHECK-START: void Main.testInstanceOf_Inlined(java.lang.Object) instruction_simplifier$after_bce (after)
+  /// CHECK-START: void Main.testInstanceOf_Inlined(java.lang.Object) instruction_simplifier$after_inlining (after)
   /// CHECK-NOT:     CheckCast
   public void testInstanceOf_Inlined(Object o) {
     if (!$inline$InstanceofSubclassC(o)) {
diff --git a/test/458-checker-instruct-simplification/src/Main.java b/test/458-checker-instruct-simplification/src/Main.java
index 40baa15..529ea5b 100644
--- a/test/458-checker-instruct-simplification/src/Main.java
+++ b/test/458-checker-instruct-simplification/src/Main.java
@@ -1126,7 +1126,7 @@
     return res;
   }
 
-  /// CHECK-START: boolean Main.$noinline$EqualBoolVsIntConst(boolean) instruction_simplifier$after_bce (before)
+  /// CHECK-START: boolean Main.$noinline$EqualBoolVsIntConst(boolean) instruction_simplifier$after_inlining (before)
   /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
@@ -1136,7 +1136,7 @@
   /// CHECK-DAG:     <<NotCond:i\d+>>  Select [<<Const1>>,<<Const0>>,<<Cond>>]
   /// CHECK-DAG:                       Return [<<NotCond>>]
 
-  /// CHECK-START: boolean Main.$noinline$EqualBoolVsIntConst(boolean) instruction_simplifier$after_bce (after)
+  /// CHECK-START: boolean Main.$noinline$EqualBoolVsIntConst(boolean) instruction_simplifier$after_inlining (after)
   /// CHECK-DAG:     <<True:i\d+>>     IntConstant 1
   /// CHECK-DAG:                       Return [<<True>>]
 
@@ -1151,7 +1151,7 @@
     return arg;
   }
 
-  /// CHECK-START: boolean Main.$noinline$NotEqualBoolVsIntConst(boolean) instruction_simplifier$after_bce (before)
+  /// CHECK-START: boolean Main.$noinline$NotEqualBoolVsIntConst(boolean) instruction_simplifier$after_inlining (before)
   /// CHECK-DAG:     <<Arg:z\d+>>      ParameterValue
   /// CHECK-DAG:     <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:     <<Const1:i\d+>>   IntConstant 1
@@ -1161,7 +1161,7 @@
   /// CHECK-DAG:     <<NotCond:i\d+>>  Select [<<Const1>>,<<Const0>>,<<Cond>>]
   /// CHECK-DAG:                       Return [<<NotCond>>]
 
-  /// CHECK-START: boolean Main.$noinline$NotEqualBoolVsIntConst(boolean) instruction_simplifier$after_bce (after)
+  /// CHECK-START: boolean Main.$noinline$NotEqualBoolVsIntConst(boolean) instruction_simplifier$after_inlining (after)
   /// CHECK-DAG:     <<False:i\d+>>    IntConstant 0
   /// CHECK-DAG:                       Return [<<False>>]
 
@@ -1191,13 +1191,13 @@
   /// CHECK-DAG:     <<NotResult:z\d+>> BooleanNot [<<Result>>]
   /// CHECK-DAG:                        Return [<<NotResult>>]
 
-  /// CHECK-START: boolean Main.$noinline$NotNotBool(boolean) instruction_simplifier$after_bce (before)
+  /// CHECK-START: boolean Main.$noinline$NotNotBool(boolean) instruction_simplifier$after_inlining (before)
   /// CHECK-DAG:     <<Arg:z\d+>>       ParameterValue
   /// CHECK-DAG:     <<NotArg:z\d+>>    BooleanNot [<<Arg>>]
   /// CHECK-DAG:     <<NotNotArg:z\d+>> BooleanNot [<<NotArg>>]
   /// CHECK-DAG:                        Return [<<NotNotArg>>]
 
-  /// CHECK-START: boolean Main.$noinline$NotNotBool(boolean) instruction_simplifier$after_bce (after)
+  /// CHECK-START: boolean Main.$noinline$NotNotBool(boolean) instruction_simplifier$after_inlining (after)
   /// CHECK-DAG:     <<Arg:z\d+>>       ParameterValue
   /// CHECK-DAG:     <<NotArg:z\d+>>    BooleanNot [<<Arg>>]
   /// CHECK-DAG:                        Return [<<Arg>>]
@@ -1333,7 +1333,7 @@
     return arg * 31;
   }
 
-  /// CHECK-START: int Main.$noinline$booleanFieldNotEqualOne() instruction_simplifier$after_bce (before)
+  /// CHECK-START: int Main.$noinline$booleanFieldNotEqualOne() instruction_simplifier$after_inlining (before)
   /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
@@ -1343,7 +1343,7 @@
   /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
   /// CHECK-DAG:                        Return [<<Select>>]
 
-  /// CHECK-START: int Main.$noinline$booleanFieldNotEqualOne() instruction_simplifier$after_bce (after)
+  /// CHECK-START: int Main.$noinline$booleanFieldNotEqualOne() instruction_simplifier$after_inlining (after)
   /// CHECK-DAG:      <<doThrow:z\d+>>  StaticFieldGet
   /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
@@ -1356,7 +1356,7 @@
     return (booleanField == $inline$true()) ? 13 : 54;
   }
 
-  /// CHECK-START: int Main.$noinline$booleanFieldEqualZero() instruction_simplifier$after_bce (before)
+  /// CHECK-START: int Main.$noinline$booleanFieldEqualZero() instruction_simplifier$after_inlining (before)
   /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const54:i\d+>>  IntConstant 54
@@ -1366,7 +1366,7 @@
   /// CHECK-DAG:      <<Select:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
   /// CHECK-DAG:                        Return [<<Select>>]
 
-  /// CHECK-START: int Main.$noinline$booleanFieldEqualZero() instruction_simplifier$after_bce (after)
+  /// CHECK-START: int Main.$noinline$booleanFieldEqualZero() instruction_simplifier$after_inlining (after)
   /// CHECK-DAG:      <<doThrow:z\d+>>  StaticFieldGet
   /// CHECK-DAG:      <<Field:z\d+>>    StaticFieldGet
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
@@ -1379,7 +1379,7 @@
     return (booleanField != $inline$false()) ? 13 : 54;
   }
 
-  /// CHECK-START: int Main.$noinline$intConditionNotEqualOne(int) instruction_simplifier$after_bce (before)
+  /// CHECK-START: int Main.$noinline$intConditionNotEqualOne(int) instruction_simplifier$after_inlining (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
@@ -1392,7 +1392,7 @@
   /// CHECK-DAG:      <<Result:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
   /// CHECK-DAG:                        Return [<<Result>>]
 
-  /// CHECK-START: int Main.$noinline$intConditionNotEqualOne(int) instruction_simplifier$after_bce (after)
+  /// CHECK-START: int Main.$noinline$intConditionNotEqualOne(int) instruction_simplifier$after_inlining (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
@@ -1408,7 +1408,7 @@
     return ((i > 42) == $inline$true()) ? 13 : 54;
   }
 
-  /// CHECK-START: int Main.$noinline$intConditionEqualZero(int) instruction_simplifier$after_bce (before)
+  /// CHECK-START: int Main.$noinline$intConditionEqualZero(int) instruction_simplifier$after_inlining (before)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:      <<Const1:i\d+>>   IntConstant 1
@@ -1421,7 +1421,7 @@
   /// CHECK-DAG:      <<Result:i\d+>>   Select [<<Const13>>,<<Const54>>,<<NE>>]
   /// CHECK-DAG:                        Return [<<Result>>]
 
-  /// CHECK-START: int Main.$noinline$intConditionEqualZero(int) instruction_simplifier$after_bce (after)
+  /// CHECK-START: int Main.$noinline$intConditionEqualZero(int) instruction_simplifier$after_inlining (after)
   /// CHECK-DAG:      <<Arg:i\d+>>      ParameterValue
   /// CHECK-DAG:      <<Const13:i\d+>>  IntConstant 13
   /// CHECK-DAG:      <<Const42:i\d+>>  IntConstant 42
diff --git a/test/562-no-intermediate/expected.txt b/test/478-checker-inline-noreturn/expected.txt
similarity index 100%
copy from test/562-no-intermediate/expected.txt
copy to test/478-checker-inline-noreturn/expected.txt
diff --git a/test/478-checker-inline-noreturn/info.txt b/test/478-checker-inline-noreturn/info.txt
new file mode 100644
index 0000000..64f42ed
--- /dev/null
+++ b/test/478-checker-inline-noreturn/info.txt
@@ -0,0 +1,3 @@
+Tests inlining a function with a no-exit loop into a loop. LinearOrder
+computation fails because of incorrect HLoopInformation if we inline
+a loop without an exit.
diff --git a/test/478-checker-inline-noreturn/src/Main.java b/test/478-checker-inline-noreturn/src/Main.java
new file mode 100644
index 0000000..7aaeac0
--- /dev/null
+++ b/test/478-checker-inline-noreturn/src/Main.java
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+/*
+ * A test that checks that the inliner does not inline functions that contain
+ * a loop with no exit.  This because the incremental update to
+ * HLoopInformation done by the inliner does not work with the LinearOrder
+ * computation if the inlined function does not always return.
+ */
+
+public class Main {
+
+  public static void assertIntEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+
+  public static int $opt$noinline$Function(int x, int y) {
+    int result;
+    if (x <= y) {
+      result = 42;
+    } else {
+      while (true);
+    }
+    return result;
+  }
+
+  /// CHECK-START: int Main.callerLoop(int, int) inliner (before)
+  /// CHECK:         InvokeStaticOrDirect method_name:Main.$opt$noinline$Function  loop:{{B\d+}}
+
+  /// CHECK-START: int Main.callerLoop(int, int) inliner (after)
+  /// CHECK:         InvokeStaticOrDirect method_name:Main.$opt$noinline$Function  loop:{{B\d+}}
+
+  public static int callerLoop(int max_x, int max_y) {
+    int total = 0;
+    for (int x = 0; x < max_x; ++x) {
+      total += $opt$noinline$Function(x, max_y);
+    }
+    return total;
+  }
+
+  public static void main(String[] args) {
+    assertIntEquals(42, callerLoop(1, 1));
+  }
+}
diff --git a/test/480-checker-dead-blocks/src/Main.java b/test/480-checker-dead-blocks/src/Main.java
index 141054d..0ca822f 100644
--- a/test/480-checker-dead-blocks/src/Main.java
+++ b/test/480-checker-dead-blocks/src/Main.java
@@ -30,7 +30,7 @@
     return false;
   }
 
-  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination$final (before)
+  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination$after_inlining (before)
   /// CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
   /// CHECK-DAG:                      If
@@ -39,13 +39,13 @@
   /// CHECK-DAG:     <<Phi:i\d+>>     Phi [<<Add>>,<<Sub>>]
   /// CHECK-DAG:                      Return [<<Phi>>]
 
-  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination$after_inlining (after)
   /// CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<Add:i\d+>>     Add [<<ArgX>>,<<ArgY>>]
   /// CHECK-DAG:                      Return [<<Add>>]
 
-  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testTrueBranch(int, int) dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      If
   /// CHECK-NOT:                      Sub
   /// CHECK-NOT:                      Phi
@@ -62,7 +62,7 @@
     return z;
   }
 
-  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination$final (before)
+  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination$after_inlining (before)
   /// CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
   /// CHECK-DAG:                      If
@@ -71,13 +71,13 @@
   /// CHECK-DAG:     <<Phi:i\d+>>     Phi [<<Add>>,<<Sub>>]
   /// CHECK-DAG:                      Return [<<Phi>>]
 
-  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination$after_inlining (after)
   /// CHECK-DAG:     <<ArgX:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<ArgY:i\d+>>    ParameterValue
   /// CHECK-DAG:     <<Sub:i\d+>>     Sub [<<ArgX>>,<<ArgY>>]
   /// CHECK-DAG:                      Return [<<Sub>>]
 
-  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testFalseBranch(int, int) dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      If
   /// CHECK-NOT:                      Add
   /// CHECK-NOT:                      Phi
@@ -94,10 +94,10 @@
     return z;
   }
 
-  /// CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination$final (before)
+  /// CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination$after_inlining (before)
   /// CHECK:                          Mul
 
-  /// CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testRemoveLoop(int) dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      Mul
 
   public static int testRemoveLoop(int x) {
@@ -109,11 +109,11 @@
     return x;
   }
 
-  /// CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination$final (before)
+  /// CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination$after_inlining (before)
   /// CHECK-DAG:                      Return
   /// CHECK-DAG:                      Exit
 
-  /// CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testInfiniteLoop(int) dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      Return
   /// CHECK-NOT:                      Exit
 
@@ -124,15 +124,15 @@
     return x;
   }
 
-  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination$final (before)
+  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination$after_inlining (before)
   /// CHECK-DAG:                      If
   /// CHECK-DAG:                      Add
 
-  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination$after_inlining (after)
   /// CHECK-DAG:     <<Arg:i\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testDeadLoop(int) dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      If
   /// CHECK-NOT:                      Add
 
@@ -143,16 +143,16 @@
     return x;
   }
 
-  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination$final (before)
+  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination$after_inlining (before)
   /// CHECK-DAG:                      If
   /// CHECK-DAG:                      If
   /// CHECK-DAG:                      Add
 
-  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination$after_inlining (after)
   /// CHECK-DAG:     <<Arg:i\d+>>     ParameterValue
   /// CHECK-DAG:                      Return [<<Arg>>]
 
-  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testUpdateLoopInformation(int) dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      If
   /// CHECK-NOT:                      Add
 
@@ -165,13 +165,13 @@
     return x;
   }
 
-  /// CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination$final (before)
+  /// CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination$after_inlining (before)
   /// CHECK:                          SuspendCheck
   /// CHECK:                          SuspendCheck
   /// CHECK:                          SuspendCheck
   /// CHECK-NOT:                      SuspendCheck
 
-  /// CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.testRemoveSuspendCheck(int, int) dead_code_elimination$after_inlining (after)
   /// CHECK:                          SuspendCheck
   /// CHECK:                          SuspendCheck
   /// CHECK-NOT:                      SuspendCheck
diff --git a/test/485-checker-dce-loop-update/smali/TestCase.smali b/test/485-checker-dce-loop-update/smali/TestCase.smali
index e3617c7..cda6f73 100644
--- a/test/485-checker-dce-loop-update/smali/TestCase.smali
+++ b/test/485-checker-dce-loop-update/smali/TestCase.smali
@@ -23,7 +23,7 @@
 .end method
 
 
-## CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination$after_inlining (before)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<Cst1:i\d+>>  IntConstant 1
@@ -36,7 +36,7 @@
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
 ## CHECK-DAG:                    Return [<<PhiX>>]                          loop:none
 
-## CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testSingleExit(int, boolean) dead_code_elimination$after_inlining (after)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<Cst7:i\d+>>  IntConstant 7
@@ -73,7 +73,7 @@
 .end method
 
 
-## CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination$after_inlining (before)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -88,7 +88,7 @@
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
 ## CHECK-DAG:                    Return [<<PhiX>>]                          loop:none
 
-## CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testMultipleExits(int, boolean, boolean) dead_code_elimination$after_inlining (after)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -129,7 +129,7 @@
 .end method
 
 
-## CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination$after_inlining (before)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -146,7 +146,7 @@
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
 ## CHECK-DAG:                    Return [<<SelX>>]                          loop:none
 
-## CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testExitPredecessors(int, boolean, boolean) dead_code_elimination$after_inlining (after)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -194,7 +194,7 @@
 .end method
 
 
-## CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination$after_inlining (before)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
@@ -217,7 +217,7 @@
 ## CHECK-DAG:     <<Add7>>       Add [<<PhiX>>,<<Cst7>>]                    loop:<<HeaderY>>
 ## CHECK-DAG:                    Return [<<PhiX>>]                          loop:none
 
-## CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testInnerLoop(int, boolean, boolean) dead_code_elimination$after_inlining (after)
 ## CHECK-DAG:     <<ArgX:i\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgY:z\d+>>  ParameterValue
 ## CHECK-DAG:     <<ArgZ:z\d+>>  ParameterValue
diff --git a/test/485-checker-dce-switch/src/Main.java b/test/485-checker-dce-switch/src/Main.java
index 7d5fd4f..95b1a93 100644
--- a/test/485-checker-dce-switch/src/Main.java
+++ b/test/485-checker-dce-switch/src/Main.java
@@ -20,14 +20,14 @@
     return 5;
   }
 
-  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination$final (before)
+  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination$after_inlining (before)
   /// CHECK-DAG:                      PackedSwitch
 
-  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination$after_inlining (after)
   /// CHECK-DAG:    <<Const100:i\d+>> IntConstant 100
   /// CHECK-DAG:                      Return [<<Const100>>]
 
-  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination$final (after)
+  /// CHECK-START: int Main.wholeSwitchDead(int) dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      PackedSwitch
 
   public static int wholeSwitchDead(int j) {
@@ -60,14 +60,14 @@
     return l;
   }
 
-  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination$final (before)
+  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination$after_inlining (before)
   /// CHECK-DAG:                      PackedSwitch
 
-  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination$final (after)
+  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination$after_inlining (after)
   /// CHECK-DAG:     <<Const7:i\d+>>  IntConstant 7
   /// CHECK-DAG:                      Return [<<Const7>>]
 
-  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination$final (after)
+  /// CHECK-START: int Main.constantSwitch_InRange() dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      PackedSwitch
 
   public static int constantSwitch_InRange() {
@@ -96,14 +96,14 @@
     return i;
   }
 
-  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination$final (before)
+  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination$after_inlining (before)
   /// CHECK-DAG:                      PackedSwitch
 
-  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination$final (after)
+  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination$after_inlining (after)
   /// CHECK-DAG:     <<Const15:i\d+>> IntConstant 15
   /// CHECK-DAG:                      Return [<<Const15>>]
 
-  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination$final (after)
+  /// CHECK-START: int Main.constantSwitch_AboveRange() dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      PackedSwitch
 
   public static int constantSwitch_AboveRange() {
@@ -132,14 +132,14 @@
     return i;
   }
 
-  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination$final (before)
+  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination$after_inlining (before)
   /// CHECK-DAG:                      PackedSwitch
 
-  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination$final (after)
+  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination$after_inlining (after)
   /// CHECK-DAG:     <<ConstM5:i\d+>> IntConstant -5
   /// CHECK-DAG:                      Return [<<ConstM5>>]
 
-  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination$final (after)
+  /// CHECK-START: int Main.constantSwitch_BelowRange() dead_code_elimination$after_inlining (after)
   /// CHECK-NOT:                      PackedSwitch
 
   public static int constantSwitch_BelowRange() {
diff --git a/test/527-checker-array-access-split/info.txt b/test/527-checker-array-access-split/info.txt
index 9206804..a39bea3 100644
--- a/test/527-checker-array-access-split/info.txt
+++ b/test/527-checker-array-access-split/info.txt
@@ -1 +1 @@
-Test arm64-specific array access optimization.
+Test arm- and arm64-specific array access optimization.
diff --git a/test/530-checker-loops2/src/Main.java b/test/530-checker-loops2/src/Main.java
index 7acf008..47b6475 100644
--- a/test/530-checker-loops2/src/Main.java
+++ b/test/530-checker-loops2/src/Main.java
@@ -111,6 +111,24 @@
     return result;
   }
 
+  /// CHECK-START: int Main.periodicXorSequence(int) BCE (before)
+  /// CHECK-DAG: BoundsCheck
+  //
+  /// CHECK-START: int Main.periodicXorSequence(int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  private static int periodicXorSequence(int tc) {
+    int[] x = { 1, 3 };
+    // Loop with periodic sequence (0, 1).
+    int k = 0;
+    int result = 0;
+    for (int i = 0; i < tc; i++) {
+      result += x[k];
+      k ^= 1;
+    }
+    return result;
+  }
+
   /// CHECK-START: int Main.justRightUp1() BCE (before)
   /// CHECK-DAG: BoundsCheck
   //
@@ -872,6 +890,26 @@
     return result;
   }
 
+  /// CHECK-START: int Main.shortIndex(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.shortIndex(int[]) BCE (after)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>>
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>
+  //
+  /// CHECK-START: int Main.shortIndex(int[]) BCE (after)
+  /// CHECK-NOT: Deoptimize
+  static int shortIndex(int[] a) {
+    int r = 0;
+    // Make sure short/int conversions compiles well (b/32193474).
+    for (short i = 1; i < 10; i++) {
+      int ki = i - 1;
+      r += a[ki] + a[i];
+    }
+    return r;
+  }
+
   //
   // Verifier.
   //
@@ -895,8 +933,9 @@
     expectEquals(0, periodicIdiom(-1));
     for (int tc = 0; tc < 32; tc++) {
       int expected = (tc >> 1) << 2;
-      if ((tc & 1) != 0)
+      if ((tc & 1) != 0) {
         expected += 1;
+      }
       expectEquals(expected, periodicIdiom(tc));
     }
 
@@ -904,8 +943,9 @@
     expectEquals(0, periodicSequence2(-1));
     for (int tc = 0; tc < 32; tc++) {
       int expected = (tc >> 1) << 2;
-      if ((tc & 1) != 0)
+      if ((tc & 1) != 0) {
         expected += 1;
+      }
       expectEquals(expected, periodicSequence2(tc));
     }
 
@@ -915,6 +955,16 @@
       expectEquals(tc * 16, periodicSequence4(tc));
     }
 
+    // Periodic adds (1, 3), one at the time.
+    expectEquals(0, periodicXorSequence(-1));
+    for (int tc = 0; tc < 32; tc++) {
+      int expected = (tc >> 1) << 2;
+      if ((tc & 1) != 0) {
+        expected += 1;
+      }
+      expectEquals(expected, periodicXorSequence(tc));
+    }
+
     // Large bounds.
     expectEquals(55, justRightUp1());
     expectEquals(55, justRightUp2());
@@ -1194,6 +1244,8 @@
     Integer[] x9 = { 9 };
     expectEquals(145, dynamicBCEAndConstantIndexRefType(x, x9, 0, 10));
 
+    expectEquals(99, shortIndex(x));
+
     System.out.println("passed");
   }
 
diff --git a/test/530-checker-loops3/src/Main.java b/test/530-checker-loops3/src/Main.java
index 6b5c657..209786a 100644
--- a/test/530-checker-loops3/src/Main.java
+++ b/test/530-checker-loops3/src/Main.java
@@ -246,7 +246,7 @@
 
     oneConstantIndex(a, b);
     for (int i = 0; i < a.length; i++) {
-      expectEquals(2, a[i]);;
+      expectEquals(2, a[i]);
     }
     try {
       oneConstantIndex(a, b1);
@@ -256,7 +256,7 @@
 
     multipleConstantIndices(a, b);
     for (int i = 0; i < a.length; i++) {
-      expectEquals(6, a[i]);;
+      expectEquals(6, a[i]);
     }
     try {
       multipleConstantIndices(a, b1);
@@ -266,7 +266,7 @@
 
     oneInvariantIndex(a, b, 1);
     for (int i = 0; i < a.length; i++) {
-      expectEquals(2, a[i]);;
+      expectEquals(2, a[i]);
     }
     try {
       oneInvariantIndex(a, b1, 1);
@@ -276,7 +276,7 @@
 
     multipleInvariantIndices(a, b, 1);
     for (int i = 0; i < a.length; i++) {
-      expectEquals(6, a[i]);;
+      expectEquals(6, a[i]);
     }
     try {
       multipleInvariantIndices(a, b1, 1);
@@ -286,18 +286,18 @@
 
     oneUnitStride(a, b);
     for (int i = 0; i < a.length; i++) {
-      expectEquals(i + 1, a[i]);;
+      expectEquals(i + 1, a[i]);
     }
     try {
       oneUnitStride(a, b1);
       throw new Error("Should throw AIOOBE");
     } catch (ArrayIndexOutOfBoundsException e) {
-      expectEquals(100, a[0]);;
+      expectEquals(100, a[0]);
     }
 
     multipleUnitStrides(a, b);
     for (int i = 1; i < a.length - 1; i++) {
-      expectEquals(3 * i + 3, a[i]);;
+      expectEquals(3 * i + 3, a[i]);
     }
     try {
       multipleUnitStrides(a, b1);
@@ -308,7 +308,7 @@
     multipleUnitStridesConditional(a, b);
     for (int i = 2; i < a.length - 2; i++) {
       int e = 3 * i + 3 + (((i & 1) == 0) ? i + 2 : i);
-      expectEquals(e, a[i]);;
+      expectEquals(e, a[i]);
     }
     try {
       multipleUnitStridesConditional(a, b1);
diff --git a/test/530-checker-lse/expected.txt b/test/530-checker-lse/expected.txt
index e69de29..ddae16a 100644
--- a/test/530-checker-lse/expected.txt
+++ b/test/530-checker-lse/expected.txt
@@ -0,0 +1 @@
+java.lang.ArrayIndexOutOfBoundsException: length=3; index=3
diff --git a/test/530-checker-lse/src/Main.java b/test/530-checker-lse/src/Main.java
index 89875d7..9f4be6c 100644
--- a/test/530-checker-lse/src/Main.java
+++ b/test/530-checker-lse/src/Main.java
@@ -18,6 +18,9 @@
   Circle(double radius) {
     this.radius = radius;
   }
+  public double getRadius() {
+    return radius;
+  }
   public double getArea() {
     return radius * radius * Math.PI;
   }
@@ -717,6 +720,71 @@
     return sumWithFilter(array, filter);
   }
 
+  private static int mI = 0;
+  private static float mF = 0f;
+
+  /// CHECK-START: float Main.testAllocationEliminationWithLoops() load_store_elimination (before)
+  /// CHECK: NewInstance
+  /// CHECK: NewInstance
+  /// CHECK: NewInstance
+
+  /// CHECK-START: float Main.testAllocationEliminationWithLoops() load_store_elimination (after)
+  /// CHECK-NOT: NewInstance
+
+  private static float testAllocationEliminationWithLoops() {
+    for (int i0 = 0; i0 < 5; i0++) {
+      for (int i1 = 0; i1 < 5; i1++) {
+        for (int i2 = 0; i2 < 5; i2++) {
+          int lI0 = ((int) new Integer(((int) new Integer(mI))));
+          if (((boolean) new Boolean(false))) {
+            for (int i3 = 576 - 1; i3 >= 0; i3--) {
+              mF -= 976981405.0f;
+            }
+          }
+        }
+      }
+    }
+    return 1.0f;
+  }
+
+  /// CHECK-START: double Main.getCircleArea(double, boolean) load_store_elimination (before)
+  /// CHECK: NewInstance
+
+  /// CHECK-START: double Main.getCircleArea(double, boolean) load_store_elimination (after)
+  /// CHECK-NOT: NewInstance
+
+  private static double getCircleArea(double radius, boolean b) {
+    double area = 0d;
+    if (b) {
+      area = new Circle(radius).getArea();
+    }
+    return area;
+  }
+
+  /// CHECK-START: double Main.testDeoptimize(int[], double[], double) load_store_elimination (before)
+  /// CHECK: Deoptimize
+  /// CHECK: NewInstance
+  /// CHECK: Deoptimize
+  /// CHECK: NewInstance
+
+  /// CHECK-START: double Main.testDeoptimize(int[], double[], double) load_store_elimination (after)
+  /// CHECK: Deoptimize
+  /// CHECK: NewInstance
+  /// CHECK: Deoptimize
+  /// CHECK-NOT: NewInstance
+
+  private static double testDeoptimize(int[] iarr, double[] darr, double radius) {
+    iarr[0] = 1;  // One HDeoptimize here. Not triggered.
+    iarr[1] = 1;
+    Circle circle1 = new Circle(radius);
+    iarr[2] = 1;
+    darr[0] = circle1.getRadius();  // One HDeoptimize here, which holds circle1 live. Triggered.
+    darr[1] = circle1.getRadius();
+    darr[2] = circle1.getRadius();
+    darr[3] = circle1.getRadius();
+    return new Circle(Math.PI).getArea();
+  }
+
   static void assertIntEquals(int result, int expected) {
     if (expected != result) {
       throw new Error("Expected: " + expected + ", found: " + result);
@@ -779,6 +847,24 @@
     assertIntEquals($noinline$testHSelect(true), 0xdead);
     int[] array = {2, 5, 9, -1, -3, 10, 8, 4};
     assertIntEquals(sumWithinRange(array, 1, 5), 11);
+    assertFloatEquals(testAllocationEliminationWithLoops(), 1.0f);
+    assertFloatEquals(mF, 0f);
+    assertDoubleEquals(Math.PI * Math.PI * Math.PI, getCircleArea(Math.PI, true));
+    assertDoubleEquals(0d, getCircleArea(Math.PI, false));
+
+    int[] iarray = {0, 0, 0};
+    double[] darray = {0d, 0d, 0d};
+    try {
+      assertDoubleEquals(Math.PI * Math.PI * Math.PI, testDeoptimize(iarray, darray, Math.PI));
+    } catch (Exception e) {
+      System.out.println(e);
+    }
+    assertIntEquals(iarray[0], 1);
+    assertIntEquals(iarray[1], 1);
+    assertIntEquals(iarray[2], 1);
+    assertDoubleEquals(darray[0], Math.PI);
+    assertDoubleEquals(darray[1], Math.PI);
+    assertDoubleEquals(darray[2], Math.PI);
   }
 
   static boolean sFlag;
diff --git a/test/530-checker-lse2/expected.txt b/test/530-checker-lse2/expected.txt
new file mode 100644
index 0000000..e18fc7e
--- /dev/null
+++ b/test/530-checker-lse2/expected.txt
@@ -0,0 +1,8 @@
+Start....
+r  = 9.649776E8
+mZ = false
+mI = 0
+mJ = -576460752303423488
+mF = NaN
+mD = NaN
+Done....
diff --git a/test/530-checker-lse2/info.txt b/test/530-checker-lse2/info.txt
new file mode 100644
index 0000000..8dd3f50
--- /dev/null
+++ b/test/530-checker-lse2/info.txt
@@ -0,0 +1,2 @@
+Checker test for testing store/allocation elimination in presence of
+HDeoptimize.
diff --git a/test/530-checker-lse2/src/Main.java b/test/530-checker-lse2/src/Main.java
new file mode 100644
index 0000000..0fe3d87
--- /dev/null
+++ b/test/530-checker-lse2/src/Main.java
@@ -0,0 +1,208 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Arrays;
+
+// Modified from a fuzz test.
+public class Main {
+
+  private interface X {
+    int x();
+  }
+
+  private class A {
+    public int a() {
+      return (+ (Math.multiplyExact(mI, mI)));
+    }
+  }
+
+  private class B extends A implements X {
+    public int a() {
+      return super.a() + ((int) (Math.max(364746077.0f, ((float) mD))));
+    }
+    public int x() {
+      return (mI >> (mI++));
+    }
+  }
+
+  private static class C implements X {
+    public static int s() {
+      return 671468641;
+    }
+    public int c() {
+      return -383762838;
+    }
+    public int x() {
+      return -138813312;
+    }
+  }
+
+  private A mA  = new B();
+  private B mB  = new B();
+  private X mBX = new B();
+  private C mC  = new C();
+  private X mCX = new C();
+
+  private boolean mZ = false;
+  private int     mI = 0;
+  private long    mJ = 0;
+  private float   mF = 0;
+  private double  mD = 0;
+
+  private boolean[] mArray = new boolean[576];
+
+  private Main() {
+    boolean a = false;
+    for (int i0 = 0; i0 < 576; i0++) {
+      mArray[i0] = a;
+      a = !a;
+    }
+  }
+
+  /// CHECK-START: float Main.testMethod() load_store_elimination (before)
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-DAG: NewInstance
+  /// CHECK-NOT: NewInstance
+
+  /// CHECK-START: float Main.testMethod() load_store_elimination (after)
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-DAG: Deoptimize
+  /// CHECK-NOT: NewInstance
+
+  private float testMethod() {
+    {
+      int lI0 = (-1456058746 << mI);
+      mD = ((double)(int)(double) mD);
+      for (int i0 = 56 - 1; i0 >= 0; i0--) {
+        mArray[i0] &= (Boolean.logicalOr(((true ? ((boolean) new Boolean((mZ))) : mZ) || mArray[i0]), (mZ)));
+        mF *= (mF * mF);
+        if ((mZ ^ true)) {
+          mF *= ((float)(int)(float) 267827331.0f);
+          mZ ^= ((false & ((boolean) new Boolean(false))) | mZ);
+          for (int i1 = 576 - 1; i1 >= 0; i1--) {
+            mZ &= ((mArray[279]) | ((boolean) new Boolean(true)));
+            mD -= (--mD);
+            for (int i2 = 56 - 1; i2 >= 0; i2--) {
+              mF /= (mF - mF);
+              mI = (Math.min(((int) new Integer(mI)), (766538816 * (++mI))));
+              mF += (mZ ? (mB.a()) : ((! mZ) ? -752042357.0f : (++mF)));
+              mJ |= ((long) new Long((-2084191070L + (mJ | mJ))));
+              lI0 |= ((int) new Integer(((int) new Integer(mI))));
+              if (((boolean) new Boolean(false))) {
+                mZ &= (mZ);
+                mF *= (mF--);
+                mD = (Double.POSITIVE_INFINITY);
+                mF += ((float)(int)(float) (-2026938813.0f * 638401585.0f));
+                mJ = (--mJ);
+                for (int i3 = 56 - 1; i3 >= 0; i3--) {
+                  mI &= (- mI);
+                  mD = (--mD);
+                  mArray[426] = (mZ || false);
+                  mF -= (((this instanceof Main) ? mF : mF) + 976981405.0f);
+                  mZ &= ((mZ) & (this instanceof Main));
+                }
+                mZ ^= (Float.isFinite(-1975953895.0f));
+              } else {
+                mJ /= ((long) (Math.nextDown(-1519600008.0f)));
+                mJ <<= (Math.round(1237681786.0));
+              }
+            }
+            mArray[i0] &= (false || ((1256071300.0f != -353296391.0f) ? false : (mZ ^ mArray[i0])));
+            mF *= (+ ((float) mD));
+            for (int i2 = 0; i2 < 576; i2++) {
+              mD *= ((double) lI0);
+              lI0 = (lI0 & (Integer.MIN_VALUE));
+              mF -= (--mF);
+            }
+            if ((this instanceof Main)) {
+              mZ ^= ((boolean) new Boolean(true));
+            } else {
+              {
+                int lI1 = (mZ ? (--lI0) : 1099574344);
+                mJ >>= (Math.incrementExact(mJ));
+                mJ = (~ -2103354070L);
+              }
+            }
+          }
+        } else {
+          mJ *= (- ((long) new Long(479832084L)));
+          mJ %= (Long.MAX_VALUE);
+          mD /= (--mD);
+          if ((mI > ((mBX.x()) << mI))) {
+            {
+              long lJ0 = (mJ--);
+              mI >>>= (mBX.x());
+            }
+            mF = (+ 505094603.0f);
+            mD *= (((boolean) new Boolean((! false))) ? mD : 1808773781.0);
+            mI *= (Integer.MIN_VALUE);
+            for (int i1 = 576 - 1; i1 >= 0; i1--) {
+              if (((boolean) new Boolean(false))) {
+                mD += ((double)(float)(double) -1051436901.0);
+              } else {
+                mF -= ((float)(int)(float) (Float.min(mF, (mF--))));
+              }
+              for (int i2 = 0; i2 < 576; i2++) {
+                mJ -= ((long) new Long(-1968644857L));
+                mJ ^= (+ (mC.s()));
+              }
+            }
+          } else {
+            mF -= ((- mF) + -2145489966.0f);
+          }
+          mD -= (mD++);
+          mD = (949112777.0 * 1209996119.0);
+        }
+        mZ &= (Boolean.logicalAnd(true, ((mZ) & (((boolean) new Boolean(true)) && true))));
+      }
+    }
+    return ((float) 964977619L);
+  }
+
+  public static void main(String[] args) {
+    System.out.println("Start....");
+    Main t = new Main();
+    float r = 1883600237.0f;
+    try {
+      r = t.testMethod();
+    } catch (Exception e) {
+      // Arithmetic, null pointer, index out of bounds, etc.
+      System.out.println("An exception was caught.");
+    }
+    System.out.println("r  = " + r);
+    System.out.println("mZ = " + t.mZ);
+    System.out.println("mI = " + t.mI);
+    System.out.println("mJ = " + t.mJ);
+    System.out.println("mF = " + t.mF);
+    System.out.println("mD = " + t.mD);
+    System.out.println("Done....");
+  }
+}
+
diff --git a/test/543-checker-dce-trycatch/smali/TestCase.smali b/test/543-checker-dce-trycatch/smali/TestCase.smali
index 5557c7b..f50e01e 100644
--- a/test/543-checker-dce-trycatch/smali/TestCase.smali
+++ b/test/543-checker-dce-trycatch/smali/TestCase.smali
@@ -26,18 +26,18 @@
 # Test a case when one entering TryBoundary is dead but the rest of the try
 # block remains live.
 
-## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$after_inlining (before)
 ## CHECK: Add
 
-## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$after_inlining (before)
 ## CHECK:     TryBoundary kind:entry
 ## CHECK:     TryBoundary kind:entry
 ## CHECK-NOT: TryBoundary kind:entry
 
-## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$after_inlining (after)
 ## CHECK-NOT: Add
 
-## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testDeadEntry(int, int, int, int) dead_code_elimination$after_inlining (after)
 ## CHECK:     TryBoundary kind:entry
 ## CHECK-NOT: TryBoundary kind:entry
 
@@ -71,18 +71,18 @@
 # Test a case when one exiting TryBoundary is dead but the rest of the try
 # block remains live.
 
-## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$after_inlining (before)
 ## CHECK: Add
 
-## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$after_inlining (before)
 ## CHECK:     TryBoundary kind:exit
 ## CHECK:     TryBoundary kind:exit
 ## CHECK-NOT: TryBoundary kind:exit
 
-## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$after_inlining (after)
 ## CHECK-NOT: Add
 
-## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testDeadExit(int, int, int, int) dead_code_elimination$after_inlining (after)
 ## CHECK:     TryBoundary kind:exit
 ## CHECK-NOT: TryBoundary kind:exit
 
@@ -117,21 +117,21 @@
 # Test that a catch block remains live and consistent if some of try blocks
 # throwing into it are removed.
 
-## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$after_inlining (before)
 ## CHECK:     TryBoundary kind:entry
 ## CHECK:     TryBoundary kind:entry
 ## CHECK-NOT: TryBoundary kind:entry
 
-## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$after_inlining (before)
 ## CHECK:     TryBoundary kind:exit
 ## CHECK:     TryBoundary kind:exit
 ## CHECK-NOT: TryBoundary kind:exit
 
-## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$after_inlining (after)
 ## CHECK:     TryBoundary kind:entry
 ## CHECK-NOT: TryBoundary kind:entry
 
-## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testOneTryBlockDead(int, int, int, int) dead_code_elimination$after_inlining (after)
 ## CHECK:     TryBoundary kind:exit
 ## CHECK-NOT: TryBoundary kind:exit
 
@@ -203,7 +203,7 @@
 
 # Test that DCE removes catch phi uses of instructions defined in dead try blocks.
 
-## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination$after_inlining (before)
 ## CHECK-DAG:     <<Arg0:i\d+>>      ParameterValue
 ## CHECK-DAG:     <<Arg1:i\d+>>      ParameterValue
 ## CHECK-DAG:     <<Const0xa:i\d+>>  IntConstant 10
@@ -220,7 +220,7 @@
 ## CHECK-DAG:                        Phi [<<Add>>,<<Const0xc>>,<<Const0xe>>] reg:2 is_catch_phi:true
 ## CHECK-DAG:                        Phi [<<Select>>,<<Const0x10>>,<<Const0x11>>] reg:3 is_catch_phi:true
 
-## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedInTryBlock(int, int, int, int) dead_code_elimination$after_inlining (after)
 ## CHECK-DAG:     <<Const0xb:i\d+>>  IntConstant 11
 ## CHECK-DAG:     <<Const0xc:i\d+>>  IntConstant 12
 ## CHECK-DAG:     <<Const0xd:i\d+>>  IntConstant 13
@@ -277,7 +277,7 @@
 # Test that DCE does not remove catch phi uses of instructions defined outside
 # dead try blocks.
 
-## CHECK-START: int TestCase.testCatchPhiInputs_DefinedOutsideTryBlock(int, int, int, int) dead_code_elimination$final (before)
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedOutsideTryBlock(int, int, int, int) dead_code_elimination$after_inlining (before)
 ## CHECK-DAG:     <<Const0xa:i\d+>> IntConstant 10
 ## CHECK-DAG:     <<Const0xb:i\d+>> IntConstant 11
 ## CHECK-DAG:     <<Const0xc:i\d+>> IntConstant 12
@@ -287,7 +287,7 @@
 ## CHECK-DAG:                       Phi [<<Const0xa>>,<<Const0xb>>,<<Const0xd>>] reg:1 is_catch_phi:true
 ## CHECK-DAG:                       Phi [<<Const0xf>>,<<Const0xc>>,<<Const0xe>>] reg:2 is_catch_phi:true
 
-## CHECK-START: int TestCase.testCatchPhiInputs_DefinedOutsideTryBlock(int, int, int, int) dead_code_elimination$final (after)
+## CHECK-START: int TestCase.testCatchPhiInputs_DefinedOutsideTryBlock(int, int, int, int) dead_code_elimination$after_inlining (after)
 ## CHECK-DAG:     <<Const0xa:i\d+>> IntConstant 10
 ## CHECK-DAG:     <<Const0xb:i\d+>> IntConstant 11
 ## CHECK-DAG:     <<Const0xc:i\d+>> IntConstant 12
diff --git a/test/543-checker-dce-trycatch/src/Main.java b/test/543-checker-dce-trycatch/src/Main.java
index 19587e7..0d7596a 100644
--- a/test/543-checker-dce-trycatch/src/Main.java
+++ b/test/543-checker-dce-trycatch/src/Main.java
@@ -35,10 +35,10 @@
   // where TryBoundary still has exception handler successors after having removed
   // some already.
 
-  /// CHECK-START: void Main.testDeadTryCatch(boolean) dead_code_elimination$final (after)
+  /// CHECK-START: void Main.testDeadTryCatch(boolean) dead_code_elimination$after_inlining (after)
   /// CHECK-NOT: TryBoundary
 
-  /// CHECK-START: void Main.testDeadTryCatch(boolean) dead_code_elimination$final (after)
+  /// CHECK-START: void Main.testDeadTryCatch(boolean) dead_code_elimination$after_inlining (after)
   /// CHECK: begin_block
   /// CHECK: begin_block
   /// CHECK: begin_block
@@ -63,4 +63,4 @@
   public static void main(String[] args) {
 
   }
-}
\ No newline at end of file
+}
diff --git a/test/552-checker-sharpening/src/Main.java b/test/552-checker-sharpening/src/Main.java
index 3c053cf..9e475ab 100644
--- a/test/552-checker-sharpening/src/Main.java
+++ b/test/552-checker-sharpening/src/Main.java
@@ -252,27 +252,27 @@
   /// CHECK-START-X86: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
   // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadString load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}}
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}}
 
   /// CHECK-START-X86_64: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
   // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadString load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}}
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}}
 
   /// CHECK-START-ARM: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
   // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadString load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}}
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}}
 
   /// CHECK-START-ARM64: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
   // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadString load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}}
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}}
 
   /// CHECK-START-MIPS: java.lang.String Main.$noinline$getBootImageString() sharpening (after)
   // Note: load kind depends on PIC/non-PIC
   // TODO: Remove DexCacheViaMethod when read barrier config supports BootImageAddress.
-  /// CHECK:                LoadString load_kind:{{BootImageAddress|DexCachePcRelative|DexCacheViaMethod}}
+  /// CHECK:                LoadString load_kind:{{BootImageAddress|BssEntry|DexCacheViaMethod}}
 
   public static String $noinline$getBootImageString() {
     // Prevent inlining to avoid the string comparison being optimized away.
@@ -303,10 +303,6 @@
   /// CHECK-START-MIPS: java.lang.String Main.$noinline$getNonBootImageString() sharpening (after)
   /// CHECK:                LoadString load_kind:BssEntry
 
-  /// CHECK-START-MIPS: java.lang.String Main.$noinline$getNonBootImageString() pc_relative_fixups_mips (after)
-  /// CHECK-DAG:            MipsComputeBaseMethodAddress
-  /// CHECK-DAG:            LoadString load_kind:BssEntry
-
   public static String $noinline$getNonBootImageString() {
     // Prevent inlining to avoid the string comparison being optimized away.
     if (doThrow) { throw new Error(); }
diff --git a/test/557-checker-instruct-simplifier-ror/src/Main.java b/test/557-checker-instruct-simplifier-ror/src/Main.java
index 0e3d145..3631353 100644
--- a/test/557-checker-instruct-simplifier-ror/src/Main.java
+++ b/test/557-checker-instruct-simplifier-ror/src/Main.java
@@ -175,7 +175,7 @@
 
   //  (i >>> #distance) | (i << #-distance)
 
-  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier$after_bce (before)
+  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier$after_inlining (before)
   /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
   /// CHECK:          <<Const2:i\d+>>       IntConstant 2
   /// CHECK:          <<ConstNeg2:i\d+>>    IntConstant -2
@@ -184,13 +184,13 @@
   /// CHECK:          <<Or:i\d+>>           Or [<<UShr>>,<<Shl>>]
   /// CHECK:                                Return [<<Or>>]
 
-  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier$after_bce (after)
+  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier$after_inlining (after)
   /// CHECK:          <<ArgValue:i\d+>>     ParameterValue
   /// CHECK:          <<Const2:i\d+>>       IntConstant 2
   /// CHECK:          <<Ror:i\d+>>          Ror [<<ArgValue>>,<<Const2>>]
   /// CHECK:                                Return [<<Ror>>]
 
-  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier$after_bce (after)
+  /// CHECK-START: int Main.ror_int_constant_c_negc(int) instruction_simplifier$after_inlining (after)
   /// CHECK-NOT:      UShr
   /// CHECK-NOT:      Shl
   public static int ror_int_constant_c_negc(int value) {
diff --git a/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali b/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
index 5d4aa56..af43973 100644
--- a/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
+++ b/test/559-checker-irreducible-loop/smali/IrreducibleLoop.smali
@@ -196,7 +196,7 @@
   const-class v0, LMain;
   if-ne v0, v2, :exit
   :other_loop_entry
-  const-class v1, LIrreducibleLoop;
+  const-class v1, Ljava/lang/Class;  # LoadClass that can throw
   goto :loop_entry
   :exit
   return-object v0
@@ -250,7 +250,7 @@
   const/4 v0, 0
   if-ne p0, v0, :other_loop_entry
   :loop_entry
-  const-class v1, LIrreducibleLoop;
+  const-class v1, Ljava/lang/Class;  # LoadClass that can throw
   if-ne v0, p0, :exit
   :other_loop_entry
   sub-int v1, p0, p0
@@ -286,7 +286,7 @@
 .method public static licm3(III)I
   .registers 4
   :loop_entry
-  const-class v0, LIrreducibleLoop;
+  const-class v0, Ljava/lang/Class;  # LoadClass that can throw
   if-ne p1, p2, :exit
   goto :loop_body
 
diff --git a/test/562-no-intermediate/expected.txt b/test/562-checker-no-intermediate/expected.txt
similarity index 100%
rename from test/562-no-intermediate/expected.txt
rename to test/562-checker-no-intermediate/expected.txt
diff --git a/test/562-checker-no-intermediate/info.txt b/test/562-checker-no-intermediate/info.txt
new file mode 100644
index 0000000..38f1f65
--- /dev/null
+++ b/test/562-checker-no-intermediate/info.txt
@@ -0,0 +1,2 @@
+Regression test for optimizing, checking that there is no
+intermediate address live across a Java call.
diff --git a/test/562-checker-no-intermediate/src/Main.java b/test/562-checker-no-intermediate/src/Main.java
new file mode 100644
index 0000000..104ba8b
--- /dev/null
+++ b/test/562-checker-no-intermediate/src/Main.java
@@ -0,0 +1,97 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+
+  /**
+   * Check that the intermediate address computation is not reordered or merged
+   * across the call to Math.abs().
+   */
+
+  /// CHECK-START-ARM: void Main.main(java.lang.String[]) instruction_simplifier_arm (before)
+  /// CHECK-DAG:           <<ConstM42:i\d+>>      IntConstant -42
+  /// CHECK-DAG:           <<Array:l\d+>>         NullCheck
+  /// CHECK-DAG:           <<Index:i\d+>>         BoundsCheck
+  /// CHECK-DAG:           <<ArrayGet:i\d+>>      ArrayGet [<<Array>>,<<Index>>]
+  /// CHECK-DAG:           <<AbsM42:i\d+>>        InvokeStaticOrDirect [<<ConstM42>>] intrinsic:MathAbsInt
+  /// CHECK-DAG:           <<Add:i\d+>>           Add [<<ArrayGet>>,<<AbsM42>>]
+  /// CHECK-DAG:                                  ArraySet [<<Array>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM: void Main.main(java.lang.String[]) instruction_simplifier_arm (after)
+  /// CHECK-DAG:           <<ConstM42:i\d+>>      IntConstant -42
+  /// CHECK-DAG:           <<DataOffset:i\d+>>    IntConstant
+  /// CHECK-DAG:           <<Array:l\d+>>         NullCheck
+  /// CHECK-DAG:           <<Index:i\d+>>         BoundsCheck
+  /// CHECK-DAG:           <<Address1:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:           <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK-DAG:           <<AbsM42:i\d+>>        InvokeStaticOrDirect [<<ConstM42>>] intrinsic:MathAbsInt
+  /// CHECK-DAG:           <<Add:i\d+>>           Add [<<ArrayGet>>,<<AbsM42>>]
+  /// CHECK-DAG:           <<Address2:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:                                  ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM: void Main.main(java.lang.String[]) GVN$after_arch (after)
+  /// CHECK-DAG:           <<ConstM42:i\d+>>      IntConstant -42
+  /// CHECK-DAG:           <<DataOffset:i\d+>>    IntConstant
+  /// CHECK-DAG:           <<Array:l\d+>>         NullCheck
+  /// CHECK-DAG:           <<Index:i\d+>>         BoundsCheck
+  /// CHECK-DAG:           <<Address1:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:           <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK-DAG:           <<AbsM42:i\d+>>        InvokeStaticOrDirect [<<ConstM42>>] intrinsic:MathAbsInt
+  /// CHECK-DAG:           <<Add:i\d+>>           Add [<<ArrayGet>>,<<AbsM42>>]
+  /// CHECK-DAG:           <<Address2:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:                                  ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+
+  /// CHECK-START-ARM64: void Main.main(java.lang.String[]) instruction_simplifier_arm64 (before)
+  /// CHECK-DAG:           <<ConstM42:i\d+>>      IntConstant -42
+  /// CHECK-DAG:           <<Array:l\d+>>         NullCheck
+  /// CHECK-DAG:           <<Index:i\d+>>         BoundsCheck
+  /// CHECK-DAG:           <<ArrayGet:i\d+>>      ArrayGet [<<Array>>,<<Index>>]
+  /// CHECK-DAG:           <<AbsM42:i\d+>>        InvokeStaticOrDirect [<<ConstM42>>] intrinsic:MathAbsInt
+  /// CHECK-DAG:           <<Add:i\d+>>           Add [<<ArrayGet>>,<<AbsM42>>]
+  /// CHECK-DAG:                                  ArraySet [<<Array>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM64: void Main.main(java.lang.String[]) instruction_simplifier_arm64 (after)
+  /// CHECK-DAG:           <<ConstM42:i\d+>>      IntConstant -42
+  /// CHECK-DAG:           <<DataOffset:i\d+>>    IntConstant
+  /// CHECK-DAG:           <<Array:l\d+>>         NullCheck
+  /// CHECK-DAG:           <<Index:i\d+>>         BoundsCheck
+  /// CHECK-DAG:           <<Address1:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:           <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK-DAG:           <<AbsM42:i\d+>>        InvokeStaticOrDirect [<<ConstM42>>] intrinsic:MathAbsInt
+  /// CHECK-DAG:           <<Add:i\d+>>           Add [<<ArrayGet>>,<<AbsM42>>]
+  /// CHECK-DAG:           <<Address2:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:                                  ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+  /// CHECK-START-ARM64: void Main.main(java.lang.String[]) GVN$after_arch (after)
+  /// CHECK-DAG:           <<ConstM42:i\d+>>      IntConstant -42
+  /// CHECK-DAG:           <<DataOffset:i\d+>>    IntConstant
+  /// CHECK-DAG:           <<Array:l\d+>>         NullCheck
+  /// CHECK-DAG:           <<Index:i\d+>>         BoundsCheck
+  /// CHECK-DAG:           <<Address1:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:           <<ArrayGet:i\d+>>      ArrayGet [<<Address1>>,<<Index>>]
+  /// CHECK-DAG:           <<AbsM42:i\d+>>        InvokeStaticOrDirect [<<ConstM42>>] intrinsic:MathAbsInt
+  /// CHECK-DAG:           <<Add:i\d+>>           Add [<<ArrayGet>>,<<AbsM42>>]
+  /// CHECK-DAG:           <<Address2:i\d+>>      IntermediateAddress [<<Array>>,<<DataOffset>>]
+  /// CHECK-DAG:                                  ArraySet [<<Address2>>,<<Index>>,<<Add>>]
+
+  public static void main(String[] args) {
+    array[index] += Math.abs(-42);
+  }
+
+  static int index = 0;
+  static int[] array = new int[2];
+}
diff --git a/test/562-no-intermediate/info.txt b/test/562-no-intermediate/info.txt
deleted file mode 100644
index 4f21aeb..0000000
--- a/test/562-no-intermediate/info.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-Regression test for optimizing, checking that there is no
-intermediate address between a Java call.
diff --git a/test/570-checker-osr/src/Main.java b/test/570-checker-osr/src/Main.java
index 8af3894..4de5634 100644
--- a/test/570-checker-osr/src/Main.java
+++ b/test/570-checker-osr/src/Main.java
@@ -17,26 +17,6 @@
 public class Main {
   public static void main(String[] args) {
     System.loadLibrary(args[0]);
-    Thread testThread = new Thread() {
-      public void run() {
-        performTest();
-      }
-    };
-    testThread.start();
-    try {
-      testThread.join(20 * 1000);  // 20s timeout.
-    } catch (InterruptedException ie) {
-      System.out.println("Interrupted.");
-      System.exit(1);
-    }
-    Thread.State state = testThread.getState();
-    if (state != Thread.State.TERMINATED) {
-      System.out.println("Test timed out, current state: " + state);
-      System.exit(1);
-    }
-  }
-
-  public static void performTest() {
     new SubMain();
     if ($noinline$returnInt() != 53) {
       throw new Error("Unexpected return value");
diff --git a/test/586-checker-null-array-get/src/Main.java b/test/586-checker-null-array-get/src/Main.java
index e0782bc..0ea7d34 100644
--- a/test/586-checker-null-array-get/src/Main.java
+++ b/test/586-checker-null-array-get/src/Main.java
@@ -100,7 +100,7 @@
   /// CHECK-DAG:                     Return [<<ArrayGet2>>]
   public static float test1() {
     Test1 test1 = getNullTest1();
-    Test2 test2 = getNullTest2();;
+    Test2 test2 = getNullTest2();
     int[] iarr = test1.iarr;
     float[] farr = test2.farr;
     iarr[0] = iarr[1];
diff --git a/test/611-checker-simplify-if/src/Main.java b/test/611-checker-simplify-if/src/Main.java
index 7dac007..c1d75ec 100644
--- a/test/611-checker-simplify-if/src/Main.java
+++ b/test/611-checker-simplify-if/src/Main.java
@@ -64,13 +64,13 @@
 
   // Test when the phi is the input of the if.
 
-  /// CHECK-START: void Main.testInline(java.lang.String[]) dead_code_elimination$final (before)
+  /// CHECK-START: void Main.testInline(java.lang.String[]) dead_code_elimination$after_inlining (before)
   /// CHECK-DAG: <<Const0:i\d+>>   IntConstant 0
   /// CHECK-DAG:                   If
   /// CHECK-DAG: <<Phi:i\d+>>      Phi
   /// CHECK-DAG:                   If [<<Phi>>]
 
-  /// CHECK-START: void Main.testInline(java.lang.String[]) dead_code_elimination$final (after)
+  /// CHECK-START: void Main.testInline(java.lang.String[]) dead_code_elimination$after_inlining (after)
   /// CHECK:      If
   /// CHECK-NOT:  Phi
   /// CHECK-NOT:  If
@@ -144,7 +144,7 @@
   /// CHECK-NOT:                          GreaterThanOrEqual
   /// CHECK-NOT:                          If
   public static void testGreaterCondition(String[] args) {
-    int a = 42;;
+    int a = 42;
     if (args.length == 42) {
       a = 34;
     } else {
diff --git a/test/618-checker-induction/src/Main.java b/test/618-checker-induction/src/Main.java
index a68c383..f85479a 100644
--- a/test/618-checker-induction/src/Main.java
+++ b/test/618-checker-induction/src/Main.java
@@ -31,6 +31,26 @@
     }
   }
 
+  /// CHECK-START: void Main.deadSingleLoop() loop_optimization (before)
+  /// CHECK-DAG: Phi loop:{{B\d+}} outer_loop:none
+  //
+  /// CHECK-START: void Main.deadSingleLoop() loop_optimization (after)
+  /// CHECK-NOT: Phi loop:{{B\d+}} outer_loop:none
+  static void deadSingleLoopN(int n) {
+    for (int i = 0; i < n; i++) {
+    }
+  }
+
+  /// CHECK-START: void Main.potentialInfiniteLoop(int) loop_optimization (before)
+  /// CHECK-DAG: Phi loop:{{B\d+}} outer_loop:none
+  //
+  /// CHECK-START: void Main.potentialInfiniteLoop(int) loop_optimization (after)
+  /// CHECK-DAG: Phi loop:{{B\d+}} outer_loop:none
+  static void potentialInfiniteLoop(int n) {
+    for (int i = 0; i <= n; i++) {  // loops forever when n = MAX_INT
+    }
+  }
+
   /// CHECK-START: void Main.deadNestedLoops() loop_optimization (before)
   /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: Phi loop:{{B\d+}}      outer_loop:<<Loop>>
@@ -72,6 +92,43 @@
     }
   }
 
+  /// CHECK-START: void Main.deadConditional(int) loop_optimization (before)
+  /// CHECK-DAG: Phi loop:{{B\d+}} outer_loop:none
+  //
+  /// CHECK-START: void Main.deadConditional(int) loop_optimization (after)
+  /// CHECK-NOT: Phi loop:{{B\d+}}
+  public static void deadConditional(int n) {
+    int k = 0;
+    int m = 0;
+    for (int i = 0; i < n; i++) {
+      if (i == 3)
+        k = i;
+      else
+        m = i;
+    }
+  }
+
+  /// CHECK-START: void Main.deadConditionalCycle(int) loop_optimization (before)
+  /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: void Main.deadConditionalCycle(int) loop_optimization (after)
+  /// CHECK-NOT: Phi loop:{{B\d+}}
+  public static void deadConditionalCycle(int n) {
+    int k = 0;
+    int m = 0;
+    for (int i = 0; i < n; i++) {
+      if (i == 3)
+        k--;
+      else
+        m++;
+    }
+  }
+
+
   /// CHECK-START: void Main.deadInduction() loop_optimization (before)
   /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-DAG: Phi      loop:<<Loop>>      outer_loop:none
@@ -134,17 +191,20 @@
   /// CHECK-DAG: Phi      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-NOT: BoundsCheck
   //
   /// CHECK-START: void Main.deadCycleWithException(int) loop_optimization (after)
   /// CHECK-DAG: Phi      loop:<<Loop:B\d+>> outer_loop:none
   /// CHECK-NOT: Phi      loop:<<Loop>>      outer_loop:none
   /// CHECK-DAG: ArraySet loop:<<Loop>>      outer_loop:none
-  /// CHECK-DAG: ArrayGet loop:<<Loop>>      outer_loop:none
+  /// CHECK-NOT: ArrayGet loop:<<Loop>>      outer_loop:none
   static void deadCycleWithException(int k) {
     int dead = 0;
     for (int i = 0; i < a.length; i++) {
       a[i] = 4;
-      // Increment value of dead cycle may throw exception.
+      // Increment value of dead cycle may throw exception. Dynamic
+      // BCE takes care of the bounds check though, which enables
+      // removing the ArrayGet after removing the dead cycle.
       dead += a[k];
     }
   }
@@ -155,8 +215,12 @@
   /// CHECK-DAG:               Return [<<Phi1>>] loop:none
   //
   /// CHECK-START: int Main.closedFormInductionUp() loop_optimization (after)
-  /// CHECK-NOT:               Phi    loop:B\d+ outer_loop:none
+  /// CHECK-NOT:               Phi    loop:{{B\d+}} outer_loop:none
   /// CHECK-DAG:               Return loop:none
+  //
+  /// CHECK-START: int Main.closedFormInductionUp() instruction_simplifier$after_bce (after)
+  /// CHECK-DAG: <<Int:i\d+>>  IntConstant 12395
+  /// CHECK-DAG:               Return [<<Int>>] loop:none
   static int closedFormInductionUp() {
     int closed = 12345;
     for (int i = 0; i < 10; i++) {
@@ -171,7 +235,7 @@
   /// CHECK-DAG:               Return [<<Phi2>>] loop:none
   //
   /// CHECK-START: int Main.closedFormInductionInAndDown(int) loop_optimization (after)
-  /// CHECK-NOT:               Phi    loop:B\d+ outer_loop:none
+  /// CHECK-NOT:               Phi    loop:{{B\d+}} outer_loop:none
   /// CHECK-DAG:               Return loop:none
   static int closedFormInductionInAndDown(int closed) {
     for (int i = 0; i < 10; i++) {
@@ -180,6 +244,56 @@
     return closed;  // only needs last value
   }
 
+  /// CHECK-START: int Main.closedFormNested() loop_optimization (before)
+  /// CHECK-DAG: <<Phi1:i\d+>> Phi               loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:i\d+>> Phi               loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: <<Phi3:i\d+>> Phi               loop:<<Loop2:B\d+>> outer_loop:<<Loop1>>
+  /// CHECK-DAG: <<Phi4:i\d+>> Phi               loop:<<Loop2>>      outer_loop:<<Loop1>>
+  /// CHECK-DAG:               Return [<<Phi1>>] loop:none
+  //
+  /// CHECK-START: int Main.closedFormNested() loop_optimization (after)
+  /// CHECK-NOT:               Phi    loop:{{B\d+}} outer_loop:none
+  /// CHECK-NOT:               Phi    loop:{{B\d+}} outer_loop:loop{{B\d+}}
+  /// CHECK-DAG:               Return loop:none
+  //
+  /// CHECK-START: int Main.closedFormNested() instruction_simplifier$after_bce (after)
+  /// CHECK-DAG: <<Int:i\d+>>  IntConstant 100
+  /// CHECK-DAG:               Return [<<Int>>] loop:none
+  static int closedFormNested() {
+    int closed = 0;
+    for (int i = 0; i < 10; i++) {
+      for (int j = 0; j < 10; j++) {
+        closed++;
+      }
+    }
+    return closed;  // only needs last-value
+  }
+
+  /// CHECK-START: int Main.closedFormNestedAlt() loop_optimization (before)
+  /// CHECK-DAG: <<Phi1:i\d+>> Phi               loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:i\d+>> Phi               loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: <<Phi3:i\d+>> Phi               loop:<<Loop2:B\d+>> outer_loop:<<Loop1>>
+  /// CHECK-DAG: <<Phi4:i\d+>> Phi               loop:<<Loop2>>      outer_loop:<<Loop1>>
+  /// CHECK-DAG:               Return [<<Phi1>>] loop:none
+  //
+  /// CHECK-START: int Main.closedFormNestedAlt() loop_optimization (after)
+  /// CHECK-NOT:               Phi    loop:{{B\d+}} outer_loop:none
+  /// CHECK-NOT:               Phi    loop:{{B\d+}} outer_loop:loop{{B\d+}}
+  /// CHECK-DAG:               Return loop:none
+  //
+  /// CHECK-START: int Main.closedFormNestedAlt() instruction_simplifier$after_bce (after)
+  /// CHECK-DAG: <<Int:i\d+>>  IntConstant 15082
+  /// CHECK-DAG:               Return [<<Int>>] loop:none
+  static int closedFormNestedAlt() {
+    int closed = 12345;
+    for (int i = 0; i < 17; i++) {
+      for (int j = 0; j < 23; j++) {
+        closed += 7;
+      }
+    }
+    return closed;  // only needs last-value
+  }
+
   // TODO: taken test around closed form?
   static int closedFormInductionUpN(int n) {
     int closed = 12345;
@@ -198,7 +312,7 @@
   }
 
   // TODO: move closed form even further out?
-  static int closedFormNested(int n) {
+  static int closedFormNestedN(int n) {
     int closed = 0;
     for (int i = 0; i < n; i++) {
       for (int j = 0; j < 10; j++) {
@@ -208,34 +322,97 @@
     return closed;  // only needs last-value
   }
 
-  // TODO: handle as closed/empty eventually?
-  static int mainIndexReturned(int n) {
+  // TODO: move closed form even further out?
+  static int closedFormNestedNAlt(int n) {
+    int closed = 12345;
+    for (int i = 0; i < n; i++) {
+      for (int j = 0; j < 23; j++) {
+        closed += 7;
+      }
+    }
+    return closed;  // only needs last-value
+  }
+
+  // TODO: move closed form even further out?
+  static int closedFormNestedMN(int m, int n) {
+    int closed = 0;
+    for (int i = 0; i < m; i++) {
+      for (int j = 0; j < n; j++) {
+        closed++;
+      }
+    }
+    return closed;  // only needs last-value
+  }
+
+  // TODO: move closed form even further out?
+  static int closedFormNestedMNAlt(int m, int n) {
+    int closed = 12345;
+    for (int i = 0; i < m; i++) {
+      for (int j = 0; j < n; j++) {
+        closed += 7;
+      }
+    }
+    return closed;  // only needs last-value
+  }
+
+  /// CHECK-START: int Main.mainIndexReturned() loop_optimization (before)
+  /// CHECK-DAG: <<Phi:i\d+>> Phi              loop:{{B\d+}} outer_loop:none
+  /// CHECK-DAG:              Return [<<Phi>>] loop:none
+  //
+  /// CHECK-START: int Main.mainIndexReturned() loop_optimization (after)
+  /// CHECK-NOT:              Phi    loop:{{B\d+}} outer_loop:none
+  /// CHECK-DAG:              Return loop:none
+  //
+  /// CHECK-START: int Main.mainIndexReturned() instruction_simplifier$after_bce (after)
+  /// CHECK-DAG: <<Int:i\d+>>  IntConstant 10
+  /// CHECK-DAG:               Return [<<Int>>] loop:none
+  static int mainIndexReturned() {
     int i;
-    for (i = 0; i < n; i++);
+    for (i = 0; i < 10; i++);
     return i;
   }
 
-  // If ever replaced by closed form, last value should be correct!
-  static int periodicReturned(int n) {
+  /// CHECK-START: int Main.periodicReturned9() loop_optimization (before)
+  /// CHECK-DAG: <<Phi1:i\d+>> Phi               loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:i\d+>> Phi               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               Return [<<Phi2>>] loop:none
+  //
+  /// CHECK-START: int Main.periodicReturned9() loop_optimization (after)
+  /// CHECK-NOT:               Phi    loop:{{B\d+}} outer_loop:none
+  /// CHECK-DAG:               Return loop:none
+  //
+  /// CHECK-START: int Main.periodicReturned9() instruction_simplifier$after_bce (after)
+  /// CHECK-DAG: <<Int:i\d+>>  IntConstant 1
+  /// CHECK-DAG:               Return [<<Int>>] loop:none
+  static int periodicReturned9() {
     int k = 0;
-    for (int i = 0; i < n; i++) {
+    for (int i = 0; i < 9; i++) {
       k = 1 - k;
     }
     return k;
   }
 
-  // Same here.
-  private static int getSum(int n) {
+  /// CHECK-START: int Main.periodicReturned10() loop_optimization (before)
+  /// CHECK-DAG: <<Phi1:i\d+>> Phi               loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:i\d+>> Phi               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               Return [<<Phi2>>] loop:none
+  //
+  /// CHECK-START: int Main.periodicReturned10() loop_optimization (after)
+  /// CHECK-NOT:               Phi    loop:{{B\d+}} outer_loop:none
+  /// CHECK-DAG:               Return loop:none
+  //
+  /// CHECK-START: int Main.periodicReturned10() instruction_simplifier$after_bce (after)
+  /// CHECK-DAG: <<Int:i\d+>>  IntConstant 0
+  /// CHECK-DAG:               Return [<<Int>>] loop:none
+  static int periodicReturned10() {
     int k = 0;
-    int sum = 0;
-    for (int i = 0; i < n; i++) {
-      k++;
-      sum += k;
+    for (int i = 0; i < 10; i++) {
+      k = 1 - k;
     }
-    return sum;
+    return k;
   }
 
-  // Same here.
+  // If ever replaced by closed form, last value should be correct!
   private static int getSum21() {
     int k = 0;
     int sum = 0;
@@ -246,7 +423,41 @@
     return sum;
   }
 
-  // Same here.
+  // TODO: handle as closed/empty eventually?
+  static int mainIndexReturnedN(int n) {
+    int i;
+    for (i = 0; i < n; i++);
+    return i;
+  }
+
+  /// CHECK-START: int Main.periodicReturnedN(int) loop_optimization (before)
+  /// CHECK-DAG: <<Phi1:i\d+>> Phi               loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:i\d+>> Phi               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               Return [<<Phi2>>] loop:none
+  //
+  /// CHECK-START: int Main.periodicReturnedN(int) loop_optimization (after)
+  /// CHECK-NOT:               Phi    loop:{{B\d+}} outer_loop:none
+  /// CHECK-DAG:               Return loop:none
+  static int periodicReturnedN(int n) {
+    int k = 0;
+    for (int i = 0; i < n; i++) {
+      k = 1 - k;
+    }
+    return k;
+  }
+
+  // If ever replaced by closed form, last value should be correct!
+  private static int getSumN(int n) {
+    int k = 0;
+    int sum = 0;
+    for (int i = 0; i < n; i++) {
+      k++;
+      sum += k;
+    }
+    return sum;
+  }
+
+  // If ever replaced by closed form, last value should be correct!
   private static int closedTwice() {
     int closed = 0;
     for (int i = 0; i < 10; i++) {
@@ -269,8 +480,12 @@
   /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
   //
   /// CHECK-START: int Main.closedFeed() loop_optimization (after)
-  /// CHECK-NOT:               Phi    loop:B\d+ outer_loop:none
+  /// CHECK-NOT:               Phi    loop:{{B\d+}} outer_loop:none
   /// CHECK-DAG:               Return loop:none
+  //
+  /// CHECK-START: int Main.closedFeed() instruction_simplifier$after_bce (after)
+  /// CHECK-DAG: <<Int:i\d+>>  IntConstant 20
+  /// CHECK-DAG:               Return [<<Int>>] loop:none
   private static int closedFeed() {
     int closed = 0;
     for (int i = 0; i < 10; i++) {
@@ -292,6 +507,10 @@
   /// CHECK-START: int Main.closedLargeUp() loop_optimization (after)
   /// CHECK-NOT:               Phi    loop:B\d+ outer_loop:none
   /// CHECK-DAG:               Return loop:none
+  //
+  /// CHECK-START: int Main.closedLargeUp() instruction_simplifier$after_bce (after)
+  /// CHECK-DAG: <<Int:i\d+>>  IntConstant -10
+  /// CHECK-DAG:               Return [<<Int>>] loop:none
   private static int closedLargeUp() {
     int closed = 0;
     for (int i = 0; i < 10; i++) {
@@ -308,6 +527,10 @@
   /// CHECK-START: int Main.closedLargeDown() loop_optimization (after)
   /// CHECK-NOT:               Phi    loop:B\d+ outer_loop:none
   /// CHECK-DAG:               Return loop:none
+  //
+  /// CHECK-START: int Main.closedLargeDown() instruction_simplifier$after_bce (after)
+  /// CHECK-DAG: <<Int:i\d+>>  IntConstant 10
+  /// CHECK-DAG:               Return [<<Int>>] loop:none
   private static int closedLargeDown() {
     int closed = 0;
     for (int i = 0; i < 10; i++) {
@@ -316,6 +539,136 @@
     return closed;
   }
 
+  /// CHECK-START: int Main.waterFall() loop_optimization (before)
+  /// CHECK-DAG: <<Phi1:i\d+>> Phi               loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:i\d+>> Phi               loop:<<Loop2:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi3:i\d+>> Phi               loop:<<Loop3:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi4:i\d+>> Phi               loop:<<Loop4:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi5:i\d+>> Phi               loop:<<Loop5:B\d+>> outer_loop:none
+  /// CHECK-DAG:               Return [<<Phi5>>] loop:none
+  //
+  /// CHECK-START: int Main.waterFall() loop_optimization (after)
+  /// CHECK-NOT:               Phi    loop:B\d+ outer_loop:none
+  /// CHECK-DAG:               Return loop:none
+  //
+  /// CHECK-START: int Main.waterFall() instruction_simplifier$after_bce (after)
+  /// CHECK-DAG: <<Int:i\d+>>  IntConstant 50
+  /// CHECK-DAG:               Return [<<Int>>] loop:none
+  private static int waterFall() {
+    int i = 0;
+    for (; i < 10; i++);
+    for (; i < 20; i++);
+    for (; i < 30; i++);
+    for (; i < 40; i++);
+    for (; i < 50; i++);
+    return i;  // this should become just 50
+  }
+
+  /// CHECK-START: boolean Main.periodicBoolIdiom1() loop_optimization (before)
+  /// CHECK-DAG: <<Phi1:i\d+>> Phi               loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:i\d+>> Phi               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               Return [<<Phi2>>] loop:none
+  //
+  /// CHECK-START: boolean Main.periodicBoolIdiom1() loop_optimization (after)
+  /// CHECK-NOT:               Phi    loop:{{B\d+}} outer_loop:none
+  /// CHECK-DAG:               Return loop:none
+  //
+  /// CHECK-START: boolean Main.periodicBoolIdiom1() instruction_simplifier$after_bce (after)
+  /// CHECK-DAG: <<Int:i\d+>>  IntConstant 0
+  /// CHECK-DAG:               Return [<<Int>>] loop:none
+  private static boolean periodicBoolIdiom1() {
+    boolean x = true;
+    for (int i = 0; i < 7; i++) {
+      x = !x;
+    }
+    return x;
+  }
+
+  /// CHECK-START: boolean Main.periodicBoolIdiom2() loop_optimization (before)
+  /// CHECK-DAG: <<Phi1:i\d+>> Phi               loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:i\d+>> Phi               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               Return [<<Phi2>>] loop:none
+  //
+  /// CHECK-START: boolean Main.periodicBoolIdiom2() loop_optimization (after)
+  /// CHECK-NOT:               Phi    loop:{{B\d+}} outer_loop:none
+  /// CHECK-DAG:               Return loop:none
+  //
+  /// CHECK-START: boolean Main.periodicBoolIdiom2() instruction_simplifier$after_bce (after)
+  /// CHECK-DAG: <<Int:i\d+>>  IntConstant 0
+  /// CHECK-DAG:               Return [<<Int>>] loop:none
+  private static boolean periodicBoolIdiom2() {
+    boolean x = true;
+    for (int i = 0; i < 7; i++) {
+      x = (x != true);
+    }
+    return x;
+  }
+
+  /// CHECK-START: boolean Main.periodicBoolIdiom3() loop_optimization (before)
+  /// CHECK-DAG: <<Phi1:i\d+>> Phi               loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:i\d+>> Phi               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               Return [<<Phi2>>] loop:none
+  //
+  /// CHECK-START: boolean Main.periodicBoolIdiom3() loop_optimization (after)
+  /// CHECK-NOT:               Phi    loop:{{B\d+}} outer_loop:none
+  /// CHECK-DAG:               Return loop:none
+  //
+  /// CHECK-START: boolean Main.periodicBoolIdiom3() instruction_simplifier$after_bce (after)
+  /// CHECK-DAG: <<Int:i\d+>>  IntConstant 0
+  /// CHECK-DAG:               Return [<<Int>>] loop:none
+  private static boolean periodicBoolIdiom3() {
+    boolean x = true;
+    for (int i = 0; i < 7; i++) {
+      x = (x == false);
+    }
+    return x;
+  }
+
+  /// CHECK-START: boolean Main.periodicBoolIdiom1N(boolean, int) loop_optimization (before)
+  /// CHECK-DAG: <<Phi1:i\d+>> Phi               loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:i\d+>> Phi               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               Return [<<Phi2>>] loop:none
+  //
+  /// CHECK-START: boolean Main.periodicBoolIdiom1N(boolean, int) loop_optimization (after)
+  /// CHECK-NOT:               Phi    loop:{{B\d+}} outer_loop:none
+  /// CHECK-DAG:               Return loop:none
+  private static boolean periodicBoolIdiom1N(boolean x, int n) {
+    for (int i = 0; i < n; i++) {
+      x = !x;
+    }
+    return x;
+  }
+
+  /// CHECK-START: boolean Main.periodicBoolIdiom2N(boolean, int) loop_optimization (before)
+  /// CHECK-DAG: <<Phi1:i\d+>> Phi               loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:i\d+>> Phi               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               Return [<<Phi2>>] loop:none
+  //
+  /// CHECK-START: boolean Main.periodicBoolIdiom2N(boolean, int) loop_optimization (after)
+  /// CHECK-NOT:               Phi    loop:{{B\d+}} outer_loop:none
+  /// CHECK-DAG:               Return loop:none
+  private static boolean periodicBoolIdiom2N(boolean x, int n) {
+    for (int i = 0; i < n; i++) {
+      x = (x != true);
+    }
+    return x;
+  }
+
+  /// CHECK-START: boolean Main.periodicBoolIdiom3N(boolean, int) loop_optimization (before)
+  /// CHECK-DAG: <<Phi1:i\d+>> Phi               loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: <<Phi2:i\d+>> Phi               loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG:               Return [<<Phi2>>] loop:none
+  //
+  /// CHECK-START: boolean Main.periodicBoolIdiom3N(boolean, int) loop_optimization (after)
+  /// CHECK-NOT:               Phi    loop:{{B\d+}} outer_loop:none
+  /// CHECK-DAG:               Return loop:none
+  private static boolean periodicBoolIdiom3N(boolean x, int n) {
+    for (int i = 0; i < n; i++) {
+      x = (x == false);
+    }
+    return x;
+  }
+
   private static int exceptionExitBeforeAdd() {
     int k = 0;
     try {
@@ -348,8 +701,12 @@
 
   public static void main(String[] args) {
     deadSingleLoop();
+    deadSingleLoopN(4);
+    potentialInfiniteLoop(4);
     deadNestedLoops();
     deadNestedAndFollowingLoops();
+    deadConditional(4);
+    deadConditionalCycle(4);
 
     deadInduction();
     for (int i = 0; i < a.length; i++) {
@@ -376,31 +733,50 @@
       expectEquals(4, a[i]);
     }
 
-    int c = closedFormInductionUp();
-    expectEquals(12395, c);
-    c = closedFormInductionInAndDown(12345);
-    expectEquals(12295, c);
+    expectEquals(12395, closedFormInductionUp());
+    expectEquals(12295, closedFormInductionInAndDown(12345));
+    expectEquals(10 * 10, closedFormNested());
+    expectEquals(12345 + 17 * 23 * 7, closedFormNestedAlt());
     for (int n = -4; n < 10; n++) {
       int tc = (n <= 0) ? 0 : n;
-      c = closedFormInductionUpN(n);
-      expectEquals(12345 + tc * 5, c);
-      c = closedFormInductionInAndDownN(12345, n);
-      expectEquals(12345 - tc * 5, c);
-      c = closedFormNested(n);
-      expectEquals(tc * 10, c);
+      expectEquals(12345 + tc * 5, closedFormInductionUpN(n));
+      expectEquals(12345 - tc * 5, closedFormInductionInAndDownN(12345, n));
+      expectEquals(tc * 10, closedFormNestedN(n));
+      expectEquals(12345 + tc * 23 * 7, closedFormNestedNAlt(n));
+      expectEquals(tc * (tc + 1), closedFormNestedMN(n, n + 1));
+      expectEquals(12345 + tc * (tc + 1) * 7, closedFormNestedMNAlt(n, n + 1));
     }
 
+    expectEquals(10, mainIndexReturned());
+    expectEquals(1, periodicReturned9());
+    expectEquals(0, periodicReturned10());
+    expectEquals(21, getSum21());
     for (int n = -4; n < 4; n++) {
       int tc = (n <= 0) ? 0 : n;
-      expectEquals(tc, mainIndexReturned(n));
-      expectEquals(tc & 1, periodicReturned(n));
-      expectEquals((tc * (tc + 1)) / 2, getSum(n));
+      expectEquals(tc, mainIndexReturnedN(n));
+      expectEquals(tc & 1, periodicReturnedN(n));
+      expectEquals((tc * (tc + 1)) / 2, getSumN(n));
     }
-    expectEquals(21, getSum21());
+
     expectEquals(10, closedTwice());
     expectEquals(20, closedFeed());
     expectEquals(-10, closedLargeUp());
     expectEquals(10, closedLargeDown());
+    expectEquals(50, waterFall());
+
+    expectEquals(false, periodicBoolIdiom1());
+    expectEquals(false, periodicBoolIdiom2());
+    expectEquals(false, periodicBoolIdiom3());
+    for (int n = -4; n < 10; n++) {
+      int tc = (n <= 0) ? 0 : n;
+      boolean even = (tc & 1) == 0;
+      expectEquals(even, periodicBoolIdiom1N(true, n));
+      expectEquals(!even, periodicBoolIdiom1N(false, n));
+      expectEquals(even, periodicBoolIdiom2N(true, n));
+      expectEquals(!even, periodicBoolIdiom2N(false, n));
+      expectEquals(even, periodicBoolIdiom3N(true, n));
+      expectEquals(!even, periodicBoolIdiom3N(false, n));
+    }
 
     expectEquals(100, exceptionExitBeforeAdd());
     expectEquals(100, exceptionExitAfterAdd());
@@ -419,4 +795,10 @@
       throw new Error("Expected: " + expected + ", found: " + result);
     }
   }
+
+  private static void expectEquals(boolean expected, boolean result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
 }
diff --git a/test/562-no-intermediate/expected.txt b/test/619-checker-current-method/expected.txt
similarity index 100%
copy from test/562-no-intermediate/expected.txt
copy to test/619-checker-current-method/expected.txt
diff --git a/test/619-checker-current-method/info.txt b/test/619-checker-current-method/info.txt
new file mode 100644
index 0000000..75f5213
--- /dev/null
+++ b/test/619-checker-current-method/info.txt
@@ -0,0 +1,2 @@
+Checks that we don't store the current method when the compiled
+code does not need it.
diff --git a/test/562-no-intermediate/src/Main.java b/test/619-checker-current-method/src/Main.java
similarity index 61%
copy from test/562-no-intermediate/src/Main.java
copy to test/619-checker-current-method/src/Main.java
index 3b74d6f..d829370 100644
--- a/test/562-no-intermediate/src/Main.java
+++ b/test/619-checker-current-method/src/Main.java
@@ -16,12 +16,18 @@
 
 public class Main {
 
-  /// CHECK-START-ARM64: int Main.main(String[]) register_allocator (after)
-  /// CHECK-NOT: IntermediateAddress
-  public static void main(String[] args) {
-    array[index] += Math.cos(42);
+  // Check that there is no instruction storing to stack.
+  /// CHECK-START-X86: int Main.foo(int, int, int, int, int, int) disassembly (after)
+  /// CHECK-NOT:  mov [{{\w+}}], {{\w+}}
+
+  // Use enough parameters to ensure we'll need a frame.
+  public static int foo(int a, int b, int c, int d, int e, int f) {
+    return a + b + c + d + e + f;
   }
 
-  static int index = 0;
-  static double[] array = new double[2];
+  public static void main(String[] args) {
+    if (foo(1, 2, 3, 4, 5, 6) != 21) {
+      throw new Error("Expected 21");
+    }
+  }
 }
diff --git a/test/620-checker-bce-intrinsics/expected.txt b/test/620-checker-bce-intrinsics/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/620-checker-bce-intrinsics/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/620-checker-bce-intrinsics/info.txt b/test/620-checker-bce-intrinsics/info.txt
new file mode 100644
index 0000000..a868845
--- /dev/null
+++ b/test/620-checker-bce-intrinsics/info.txt
@@ -0,0 +1 @@
+Test on bounds check elimination in loops using intrinsics.
diff --git a/test/620-checker-bce-intrinsics/src/Main.java b/test/620-checker-bce-intrinsics/src/Main.java
new file mode 100644
index 0000000..afc3c65
--- /dev/null
+++ b/test/620-checker-bce-intrinsics/src/Main.java
@@ -0,0 +1,285 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests on bounds check elimination in loops that use intrinsics.
+ * All bounds checks below should be statically eliminated.
+ */
+public class Main {
+
+  /// CHECK-START: int Main.oneArray(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> outer_loop:none
+  //
+  /// CHECK-START: int Main.oneArray(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int oneArray(int[] a) {
+    int x = 0;
+    for (int i = 0; i < a.length; i++) {
+      x += a[i];
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.oneArrayAbs(int[], int) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> outer_loop:none
+  //
+  /// CHECK-START: int Main.oneArrayAbs(int[], int) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int oneArrayAbs(int[] a, int lo) {
+    int x = 0;
+    for (int i = Math.abs(lo); i < a.length; i++) {
+      x += a[i];
+    }
+    return x;
+  }
+
+
+  /// CHECK-START: int Main.twoArrays(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.twoArrays(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int twoArrays(int[] a, int[] b) {
+    int x = 0;
+    for (int i = 0; i < Math.min(a.length, b.length); i++) {
+      x += a[i] + b[i];
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.threeArrays(int[], int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.threeArrays(int[], int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int threeArrays(int[] a, int[] b, int[] c) {
+    int x = 0;
+    for (int i = 0; i < Math.min(Math.min(a.length, b.length), c.length); i++) {
+      x += a[i] + b[i] + c[i];
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.fourArrays(int[], int[], int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.fourArrays(int[], int[], int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int fourArrays(int[] a, int[] b, int[] c, int[] d) {
+    int x = 0;
+    for (int i = 0; i < Math.min(Math.min(a.length, b.length), Math.min(c.length, d.length)); i++) {
+      x += a[i] + b[i] + c[i] + d[i];
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.oneArrayWithCleanup(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop2:B\d+>> outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  //
+  /// CHECK-START: int Main.oneArrayWithCleanup(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int oneArrayWithCleanup(int[] a) {
+    int x = 0;
+    int n = Math.min(4, a.length);
+    for (int i = 0; i < n; i++) {
+      x += a[i];
+    }
+    for (int i = n; i < a.length; i++) {
+      x += a[i] * 10;
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.twoArraysWithCleanup(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop2:B\d+>> outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  //
+  /// CHECK-START: int Main.twoArraysWithCleanup(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int twoArraysWithCleanup(int[] a, int[] b) {
+    int x = 0;
+    int n = Math.min(a.length, b.length);
+    for (int i = n - 1; i >= 0; i--) {
+      x += a[i] + b[i];
+    }
+    for (int i = n; i < a.length; i++) {
+      x += a[i];
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.threeArraysWithCleanup(int[], int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop2:B\d+>> outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  //
+  /// CHECK-START: int Main.threeArraysWithCleanup(int[], int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int threeArraysWithCleanup(int[] a, int[] b, int[] c) {
+    int x = 0;
+    int n = Math.min(a.length, Math.min(b.length, c.length));
+    for (int i = n - 1; i >= 0; i--) {
+      x += a[i] + b[i] + c[i];
+    }
+    for (int i = n; i < a.length; i++) {
+      x += a[i];
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.altLoopLogic(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.altLoopLogic(int[], int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  /// CHECK-NOT: Deoptimize
+  static int altLoopLogic(int[] a, int[] b) {
+    int x = 0;
+    int n = Math.min(a.length, b.length);
+    for (int i = n; i-- > 0;) {
+      x += a[i] + b[i];
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.hiddenMin(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.hiddenMin(int[], int[]) BCE (after)
+  //
+  // TODO: make this so
+  static int hiddenMin(int[] a, int[] b) {
+    int x = 0;
+    for (int i = 0; i < a.length && i < b.length; i++) {
+      x += a[i] + b[i];
+    }
+    return x;
+  }
+
+  /// CHECK-START: int Main.hiddenMinWithCleanup(int[], int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop2:B\d+>> outer_loop:none
+  //
+  /// CHECK-EVAL: "<<Loop1>>" != "<<Loop2>>"
+  //
+  /// CHECK-START: int Main.hiddenMinWithCleanup(int[], int[]) BCE (after)
+  //
+  // TODO: make this so
+  static int hiddenMinWithCleanup(int[] a, int[] b) {
+    int x = 0;
+    int i = 0;
+    for (; i < a.length && i < b.length; i++) {
+      x += a[i] + b[i];
+    }
+    for (; i < a.length; i++) {
+      x += a[i];
+    }
+    return x;
+  }
+
+  public static void main(String[] args) {
+    int[] a = { 1, 2, 3, 4, 5 };
+    int[] b = { 6, 7, 8, 9, 4, 2 };
+    int[] c = { 1, 2, 3 };
+    int[] d = { 8, 5, 3, 2 };
+
+    expectEquals(15, oneArray(a));
+    expectEquals(36, oneArray(b));
+    expectEquals(6,  oneArray(c));
+    expectEquals(18, oneArray(d));
+
+    expectEquals(5,  oneArrayAbs(a, -4));
+    expectEquals(15, oneArrayAbs(a, 0));
+    expectEquals(5,  oneArrayAbs(a, 4));
+
+    expectEquals(30, twoArrays(a, a));
+    expectEquals(49, twoArrays(a, b));
+    expectEquals(12, twoArrays(a, c));
+    expectEquals(28, twoArrays(a, d));
+
+    expectEquals(45, threeArrays(a, a, a));
+    expectEquals(33, threeArrays(a, b, c));
+    expectEquals(58, threeArrays(a, b, d));
+    expectEquals(28, threeArrays(a, c, d));
+
+    expectEquals(60, fourArrays(a, a, a, a));
+    expectEquals(49, fourArrays(a, b, c, d));
+
+    expectEquals(60, oneArrayWithCleanup(a));
+    expectEquals(90, oneArrayWithCleanup(b));
+    expectEquals(6,  oneArrayWithCleanup(c));
+    expectEquals(18, oneArrayWithCleanup(d));
+
+    expectEquals(30, twoArraysWithCleanup(a, a));
+    expectEquals(49, twoArraysWithCleanup(a, b));
+    expectEquals(21, twoArraysWithCleanup(a, c));
+    expectEquals(33, twoArraysWithCleanup(a, d));
+
+    expectEquals(45, threeArraysWithCleanup(a, a, a));
+    expectEquals(42, threeArraysWithCleanup(a, b, c));
+    expectEquals(63, threeArraysWithCleanup(a, b, d));
+    expectEquals(37, threeArraysWithCleanup(a, c, d));
+
+    expectEquals(30, altLoopLogic(a, a));
+    expectEquals(49, altLoopLogic(a, b));
+    expectEquals(12, altLoopLogic(a, c));
+    expectEquals(28, altLoopLogic(a, d));
+
+    expectEquals(30, hiddenMin(a, a));
+    expectEquals(49, hiddenMin(a, b));
+    expectEquals(12, hiddenMin(a, c));
+    expectEquals(28, hiddenMin(a, d));
+
+    expectEquals(30, hiddenMinWithCleanup(a, a));
+    expectEquals(49, hiddenMinWithCleanup(a, b));
+    expectEquals(21, hiddenMinWithCleanup(a, c));
+    expectEquals(33, hiddenMinWithCleanup(a, d));
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/562-no-intermediate/expected.txt b/test/621-checker-new-instance/expected.txt
similarity index 100%
copy from test/562-no-intermediate/expected.txt
copy to test/621-checker-new-instance/expected.txt
diff --git a/test/621-checker-new-instance/info.txt b/test/621-checker-new-instance/info.txt
new file mode 100644
index 0000000..c27c45c
--- /dev/null
+++ b/test/621-checker-new-instance/info.txt
@@ -0,0 +1 @@
+Tests for removing useless load class.
diff --git a/test/621-checker-new-instance/src/Main.java b/test/621-checker-new-instance/src/Main.java
new file mode 100644
index 0000000..68a4644
--- /dev/null
+++ b/test/621-checker-new-instance/src/Main.java
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class Main {
+  /// CHECK-START: java.lang.Object Main.newObject() prepare_for_register_allocation (before)
+  /// CHECK: LoadClass
+  /// CHECK: NewInstance
+
+  /// CHECK-START: java.lang.Object Main.newObject() prepare_for_register_allocation (after)
+  /// CHECK-NOT: LoadClass
+  /// CHECK: NewInstance
+  public static Object newObject() {
+      return new Object();
+  }
+
+  /// CHECK-START: java.lang.Object Main.newFinalizableMayThrow() prepare_for_register_allocation (after)
+  /// CHECK: LoadClass
+  /// CHECK: NewInstance
+  public static Object newFinalizableMayThrow() {
+      return $inline$newFinalizableMayThrow();
+  }
+
+  public static Object $inline$newFinalizableMayThrow() {
+      return new FinalizableMayThrow();
+  }
+
+  public static void main(String[] args) {
+      newFinalizableMayThrow();
+      newObject();
+  }
+}
+
+class FinalizableMayThrow {
+    // clinit may throw OOME.
+    static Object o = new Object();
+    static String s;
+    public void finalize() {
+        s = "Test";
+    }
+}
diff --git a/test/622-checker-bce-regressions/expected.txt b/test/622-checker-bce-regressions/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/622-checker-bce-regressions/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/622-checker-bce-regressions/info.txt b/test/622-checker-bce-regressions/info.txt
new file mode 100644
index 0000000..a753dfa
--- /dev/null
+++ b/test/622-checker-bce-regressions/info.txt
@@ -0,0 +1 @@
+Regression tests on BCE.
diff --git a/test/622-checker-bce-regressions/src/Main.java b/test/622-checker-bce-regressions/src/Main.java
new file mode 100644
index 0000000..6ba2644
--- /dev/null
+++ b/test/622-checker-bce-regressions/src/Main.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Regression tests for BCE.
+ */
+public class Main {
+
+  static int[] array = new int[10];
+
+  /// CHECK-START: int Main.doNotVisitAfterForwardBCE(int[]) BCE (before)
+  /// CHECK-DAG: BoundsCheck loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: BoundsCheck loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.doNotVisitAfterForwardBCE(int[]) BCE (after)
+  /// CHECK-NOT: BoundsCheck
+  static int doNotVisitAfterForwardBCE(int[] a) {
+    if (a == null) {
+      throw new Error("Null");
+    }
+    int k = 0;
+    int j = 0;
+    for (int i = 1; i < 10; i++) {
+      j = i - 1;
+      // b/32547652: after DCE, bounds checks become consecutive,
+      // and second should not be revisited after forward BCE.
+      k = a[i] + a[i - 1];
+    }
+    return j;
+  }
+
+  public static void main(String[] args) {
+    expectEquals(8, doNotVisitAfterForwardBCE(array));
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/562-no-intermediate/expected.txt b/test/622-simplifyifs-exception-edges/expected.txt
similarity index 100%
copy from test/562-no-intermediate/expected.txt
copy to test/622-simplifyifs-exception-edges/expected.txt
diff --git a/test/622-simplifyifs-exception-edges/info.txt b/test/622-simplifyifs-exception-edges/info.txt
new file mode 100644
index 0000000..58c4bfb
--- /dev/null
+++ b/test/622-simplifyifs-exception-edges/info.txt
@@ -0,0 +1,2 @@
+Regression test for the SimplifyIfs() graph simplification erroneously trying
+to redirect exception handler edges.
\ No newline at end of file
diff --git a/test/622-simplifyifs-exception-edges/smali/Test.smali b/test/622-simplifyifs-exception-edges/smali/Test.smali
new file mode 100644
index 0000000..5e91258
--- /dev/null
+++ b/test/622-simplifyifs-exception-edges/smali/Test.smali
@@ -0,0 +1,76 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LTest;
+
+.super Ljava/lang/Object;
+
+.method public static test([I)I
+    .locals 2
+    const/4 v0, 0
+    :try1_begin
+    array-length v1, p0
+    :try1_end
+    add-int/lit8 v0, v1, -1
+    :try2_begin
+    aget v0, p0, v0
+    :try2_end
+    :end
+    return v0
+
+    :catch_all
+    # Regression test for bug 32545860:
+    #     SimplifyIfs() would have redirected exception handler edges leading here.
+    # Note: There is no move-exception here to prevent matching the SimplifyIfs() pattern.
+    if-eqz v0, :is_zero
+    const/4 v0, -1
+    goto :end
+    :is_zero
+    const/4 v0, -2
+    goto :end
+
+    .catchall {:try1_begin .. :try1_end } :catch_all
+    .catchall {:try2_begin .. :try2_end } :catch_all
+.end method
+
+.method public static test2([II)I
+    .locals 3
+    move v0, p1
+    :try_begin
+    array-length v1, p0
+    add-int/lit8 v1, v1, -1
+    add-int/lit8 v0, v0, 1
+    aget v1, p0, v1
+    const/4 v0, 2
+    aget v2, p0, p1
+    const/4 v0, 3
+    :try_end
+    :end
+    return v0
+
+    :catch_all
+    # Regression test for bug 32546110:
+    #     SimplifyIfs() would have looked at predecessors of this block based on the indexes
+    #     of the catch Phi's inputs. For catch blocks these two arrays are unrelated, so
+    #     this caused out-of-range access triggering a DCHECK() in dchecked_vector<>.
+    # Note: There is no move-exception here to prevent matching the SimplifyIfs() pattern.
+    if-eqz v0, :is_zero
+    const/4 v0, -1
+    goto :end
+    :is_zero
+    const/4 v0, -2
+    goto :end
+
+    .catchall {:try_begin .. :try_end } :catch_all
+.end method
diff --git a/test/622-simplifyifs-exception-edges/src/Main.java b/test/622-simplifyifs-exception-edges/src/Main.java
new file mode 100644
index 0000000..636f047
--- /dev/null
+++ b/test/622-simplifyifs-exception-edges/src/Main.java
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+import java.lang.reflect.InvocationTargetException;
+
+public class Main {
+    public static void main(String[] args) throws Exception {
+        Class<?> c = Class.forName("Test");
+        Method test = c.getDeclaredMethod("test", int[].class);
+        assertIntEquals(-2, (int)test.invoke(null, new Object[] { null }));
+        assertIntEquals(-1, (int)test.invoke(null, new Object[] { new int[0] }));
+        assertIntEquals(42, (int)test.invoke(null, new Object[] { new int[] { 42 } }));
+
+        Method test2 = c.getDeclaredMethod("test2", int[].class, int.class);
+        assertIntEquals(-2, (int)test2.invoke(null, new Object[] { null, 0 }));
+        assertIntEquals(-1, (int)test2.invoke(null, new Object[] { new int[0], 0 }));
+        assertIntEquals(-1, (int)test2.invoke(null, new Object[] { new int[0], 1 }));
+        assertIntEquals(3, (int)test2.invoke(null, new Object[] { new int[] { 42 }, 0 }));
+    }
+
+    public static void assertIntEquals(int expected, int result) {
+        if (expected != result) {
+            throw new Error("Expected: " + expected + ", found: " + result);
+        }
+    }
+
+    // Workaround for non-zero field ids offset in dex file with no fields. Bug: 18051191
+    static final boolean dummy = false;
+}
diff --git a/test/623-checker-loop-regressions/expected.txt b/test/623-checker-loop-regressions/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/623-checker-loop-regressions/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/623-checker-loop-regressions/info.txt b/test/623-checker-loop-regressions/info.txt
new file mode 100644
index 0000000..6271600
--- /dev/null
+++ b/test/623-checker-loop-regressions/info.txt
@@ -0,0 +1 @@
+Regression tests on loop optimizations.
diff --git a/test/623-checker-loop-regressions/src/Main.java b/test/623-checker-loop-regressions/src/Main.java
new file mode 100644
index 0000000..ce5bda1
--- /dev/null
+++ b/test/623-checker-loop-regressions/src/Main.java
@@ -0,0 +1,109 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Regression tests for loop optimizations.
+ */
+public class Main {
+
+  /// CHECK-START: int Main.earlyExitFirst(int) loop_optimization (before)
+  /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.earlyExitFirst(int) loop_optimization (after)
+  /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
+  static int earlyExitFirst(int m) {
+    int k = 0;
+    for (int i = 0; i < 10; i++) {
+      if (i == m) {
+        return k;
+      }
+      k++;
+    }
+    return k;
+  }
+
+  /// CHECK-START: int Main.earlyExitLast(int) loop_optimization (before)
+  /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.earlyExitLast(int) loop_optimization (after)
+  /// CHECK-DAG: Phi loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop>>      outer_loop:none
+  static int earlyExitLast(int m) {
+    int k = 0;
+    for (int i = 0; i < 10; i++) {
+      k++;
+      if (i == m) {
+        return k;
+      }
+    }
+    return k;
+  }
+
+  /// CHECK-START: int Main.earlyExitNested() loop_optimization (before)
+  /// CHECK-DAG: Phi loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop1>>      outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop2:B\d+>> outer_loop:<<Loop1>>
+  /// CHECK-DAG: Phi loop:<<Loop2>>      outer_loop:<<Loop1>>
+  //
+  /// CHECK-START: int Main.earlyExitNested() loop_optimization (after)
+  /// CHECK-DAG: Phi loop:<<Loop1:B\d+>> outer_loop:none
+  /// CHECK-DAG: Phi loop:<<Loop1>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.earlyExitNested() loop_optimization (after)
+  /// CHECK-NOT: Phi loop:{{B\d+}} outer_loop:{{B\d+}}
+  static int earlyExitNested() {
+    int offset = 0;
+    for (int i = 0; i < 2; i++) {
+      int start = offset;
+      // This loop can be removed.
+      for (int j = 0; j < 2; j++) {
+        offset++;
+      }
+      if (i == 1) {
+        return start;
+      }
+    }
+    return 0;
+  }
+
+  public static void main(String[] args) {
+    expectEquals(10, earlyExitFirst(-1));
+    for (int i = 0; i <= 10; i++) {
+      expectEquals(i, earlyExitFirst(i));
+    }
+    expectEquals(10, earlyExitFirst(11));
+
+    expectEquals(10, earlyExitLast(-1));
+    for (int i = 0; i < 10; i++) {
+      expectEquals(i + 1, earlyExitLast(i));
+    }
+    expectEquals(10, earlyExitLast(10));
+    expectEquals(10, earlyExitLast(11));
+
+    expectEquals(2, earlyExitNested());
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/624-checker-stringops/expected.txt b/test/624-checker-stringops/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/624-checker-stringops/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/624-checker-stringops/info.txt b/test/624-checker-stringops/info.txt
new file mode 100644
index 0000000..64344ac
--- /dev/null
+++ b/test/624-checker-stringops/info.txt
@@ -0,0 +1 @@
+Verify some properties of string operations represented by intrinsics.
diff --git a/test/624-checker-stringops/src/Main.java b/test/624-checker-stringops/src/Main.java
new file mode 100644
index 0000000..34e8283
--- /dev/null
+++ b/test/624-checker-stringops/src/Main.java
@@ -0,0 +1,124 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tests properties of some string operations represented by intrinsics.
+ */
+public class Main {
+
+  static final String ABC = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
+  static final String XYZ = "XYZ";
+
+  //
+  // Variant intrinsics remain in the loop, but invariant references are hoisted out of the loop.
+  //
+  /// CHECK-START: int Main.liveIndexOf() licm (before)
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringIndexOf            loop:{{B\d+}} outer_loop:none
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringIndexOfAfter       loop:{{B\d+}} outer_loop:none
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringStringIndexOf      loop:{{B\d+}} outer_loop:none
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringStringIndexOfAfter loop:{{B\d+}} outer_loop:none
+  //
+  /// CHECK-START: int Main.liveIndexOf() licm (after)
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringIndexOf            loop:{{B\d+}} outer_loop:none
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringIndexOfAfter       loop:{{B\d+}} outer_loop:none
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringStringIndexOf      loop:none
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringStringIndexOfAfter loop:none
+  static int liveIndexOf() {
+    int k = ABC.length() + XYZ.length();  // does LoadString before loops
+    for (char c = 'A'; c <= 'Z'; c++) {
+      k += ABC.indexOf(c);
+    }
+    for (char c = 'A'; c <= 'Z'; c++) {
+      k += ABC.indexOf(c, 4);
+    }
+    for (char c = 'A'; c <= 'Z'; c++) {
+      k += ABC.indexOf(XYZ);
+    }
+    for (char c = 'A'; c <= 'Z'; c++) {
+      k += ABC.indexOf(XYZ, 2);
+    }
+    return k;
+  }
+
+  //
+  // All dead intrinsics can be removed completely.
+  //
+  /// CHECK-START: int Main.deadIndexOf() dead_code_elimination$initial (before)
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringIndexOf            loop:{{B\d+}} outer_loop:none
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringIndexOfAfter       loop:{{B\d+}} outer_loop:none
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringStringIndexOf      loop:{{B\d+}} outer_loop:none
+  /// CHECK-DAG: InvokeVirtual intrinsic:StringStringIndexOfAfter loop:{{B\d+}} outer_loop:none
+  //
+  /// CHECK-START: int Main.deadIndexOf() dead_code_elimination$initial (after)
+  /// CHECK-NOT: InvokeVirtual intrinsic:StringIndexOf
+  /// CHECK-NOT: InvokeVirtual intrinsic:StringIndexOfAfter
+  /// CHECK-NOT: InvokeVirtual intrinsic:StringStringIndexOf
+  /// CHECK-NOT: InvokeVirtual intrinsic:StringStringIndexOfAfter
+  static int deadIndexOf() {
+    int k = ABC.length() + XYZ.length();  // does LoadString before loops
+    for (char c = 'A'; c <= 'Z'; c++) {
+      int d = ABC.indexOf(c);
+    }
+    for (char c = 'A'; c <= 'Z'; c++) {
+      int d = ABC.indexOf(c, 4);
+    }
+    for (char c = 'A'; c <= 'Z'; c++) {
+      int d = ABC.indexOf(XYZ);
+    }
+    for (char c = 'A'; c <= 'Z'; c++) {
+      int d = ABC.indexOf(XYZ, 2);
+    }
+    return k;
+  }
+
+  //
+  // Explicit null check on receiver, implicit null check on argument prevents hoisting.
+  //
+  /// CHECK-START: int Main.indexOfExceptions(java.lang.String, java.lang.String) licm (after)
+  /// CHECK-DAG: <<String:l\d+>> NullCheck                                                         loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG:                 InvokeVirtual [<<String>>,{{l\d+}}] intrinsic:StringStringIndexOf loop:<<Loop>>      outer_loop:none
+  static int indexOfExceptions(String s, String t) {
+    int k = 0;
+    for (char c = 'A'; c <= 'Z'; c++) {
+      k += s.indexOf(t);
+    }
+    return k;
+  }
+
+  public static void main(String[] args) {
+    expectEquals(1865, liveIndexOf());
+    expectEquals(29, deadIndexOf());
+    try {
+      indexOfExceptions(null, XYZ);
+      throw new Error("Expected: NPE");
+    } catch (NullPointerException e) {
+    }
+    try {
+      indexOfExceptions(ABC, null);
+      throw new Error("Expected: NPE");
+    } catch (NullPointerException e) {
+    }
+    expectEquals(598, indexOfExceptions(ABC, XYZ));
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/625-checker-licm-regressions/expected.txt b/test/625-checker-licm-regressions/expected.txt
new file mode 100644
index 0000000..b0aad4d
--- /dev/null
+++ b/test/625-checker-licm-regressions/expected.txt
@@ -0,0 +1 @@
+passed
diff --git a/test/625-checker-licm-regressions/info.txt b/test/625-checker-licm-regressions/info.txt
new file mode 100644
index 0000000..10480df
--- /dev/null
+++ b/test/625-checker-licm-regressions/info.txt
@@ -0,0 +1 @@
+Regression tests on LICM.
diff --git a/test/625-checker-licm-regressions/src/Main.java b/test/625-checker-licm-regressions/src/Main.java
new file mode 100644
index 0000000..f372b1c
--- /dev/null
+++ b/test/625-checker-licm-regressions/src/Main.java
@@ -0,0 +1,135 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Regression tests for LICM.
+ */
+public class Main {
+
+  static int sA;
+
+  //
+  // We cannot hoist the null check (can throw) above the field
+  // assignment (has write side effects) because that would result
+  // in throwing an exception before the assignment is done.
+  //
+  /// CHECK-START: void Main.foo(int[]) licm (before)
+  /// CHECK-DAG: LoadClass      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: StaticFieldSet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: NullCheck      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArrayLength    loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: void Main.foo(int[]) licm (after)
+  /// CHECK-DAG: LoadClass      loop:none
+  /// CHECK-DAG: StaticFieldSet loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: NullCheck      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArrayLength    loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: void Main.foo(int[]) licm (after)
+  /// CHECK-NOT: LoadClass      loop:{{B\d+}} outer_loop:none
+  static void foo(int[] arr) {
+    int j = 0;
+    do {
+      sA = 1;
+    } while (j < arr.length);
+  }
+
+  //
+  // Similar situation as in foo(), but now a proper induction value
+  // is assigned to the field inside the do-while loop.
+  //
+  /// CHECK-START: void Main.bar(int[]) licm (before)
+  /// CHECK-DAG: LoadClass      loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: StaticFieldSet loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: NullCheck      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArrayLength    loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: void Main.bar(int[]) licm (after)
+  /// CHECK-DAG: LoadClass      loop:none
+  /// CHECK-DAG: StaticFieldSet loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: NullCheck      loop:<<Loop>>      outer_loop:none
+  /// CHECK-DAG: ArrayLength    loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: void Main.bar(int[]) licm (after)
+  /// CHECK-NOT: LoadClass      loop:{{B\d+}} outer_loop:none
+  static void bar(int[] arr) {
+    int j = 0;
+    do {
+      j++;
+      sA = j;
+    } while (j < arr.length);
+  }
+
+  //
+  // Similar situation as in bar(), but now an explicit catch
+  // statement may need the latest value of local j.
+  //
+  /// CHECK-START: int Main.catcher(int[]) licm (before)
+  /// CHECK-DAG: NullCheck   loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>      outer_loop:none
+  //
+  /// CHECK-START: int Main.catcher(int[]) licm (after)
+  /// CHECK-DAG: NullCheck   loop:<<Loop:B\d+>> outer_loop:none
+  /// CHECK-DAG: ArrayLength loop:<<Loop>>      outer_loop:none
+  static int catcher(int[] arr) {
+    int j = 0;
+    try {
+      do {
+        j++;
+      } while (j < arr.length);
+    } catch (NullPointerException e) {
+      return -j;  // flag exception with negative value
+    }
+    return j;
+  }
+
+  public static void main(String[] args) {
+    sA = 0;
+    try {
+      foo(null);
+      throw new Error("Expected NPE");
+    } catch (NullPointerException e) {
+    }
+    expectEquals(1, sA);
+
+    sA = 0;
+    try {
+      bar(null);
+      throw new Error("Expected NPE");
+    } catch (NullPointerException e) {
+    }
+    expectEquals(1, sA);
+
+    for (int i = 0; i < 5; i++) {
+      sA = 0;
+      bar(new int[i]);
+      expectEquals(i == 0 ? 1 : i, sA);
+    }
+
+    expectEquals(-1, catcher(null));
+    for (int i = 0; i < 5; i++) {
+      expectEquals(i == 0 ? 1 : i, catcher(new int[i]));
+    }
+
+    System.out.println("passed");
+  }
+
+  private static void expectEquals(int expected, int result) {
+    if (expected != result) {
+      throw new Error("Expected: " + expected + ", found: " + result);
+    }
+  }
+}
diff --git a/test/902-hello-transformation/expected.txt b/test/902-hello-transformation/expected.txt
index e86e814..a826f93 100644
--- a/test/902-hello-transformation/expected.txt
+++ b/test/902-hello-transformation/expected.txt
@@ -1,3 +1,3 @@
-Hello
+hello
 modifying class 'Transform'
 Goodbye
diff --git a/test/902-hello-transformation/run b/test/902-hello-transformation/run
index 204e4cc..3755d1d 100755
--- a/test/902-hello-transformation/run
+++ b/test/902-hello-transformation/run
@@ -39,5 +39,6 @@
                    --experimental runtime-plugins \
                    --runtime-option -agentpath:${agent}=902-hello-transformation,${arg} \
                    --android-runtime-option -Xplugin:${plugin} \
+                   --android-runtime-option -Xfully-deoptable \
                    ${other_args} \
                    --args ${lib}
diff --git a/test/902-hello-transformation/src/Transform.java b/test/902-hello-transformation/src/Transform.java
index dc0a0c4..8e8af35 100644
--- a/test/902-hello-transformation/src/Transform.java
+++ b/test/902-hello-transformation/src/Transform.java
@@ -16,6 +16,13 @@
 
 class Transform {
   public void sayHi() {
-    System.out.println("Hello");
+    // Use lower 'h' to make sure the string will have a different string id
+    // than the transformation (the transformation code is the same except
+    // the actual printed String, which was making the test inacurately passing
+    // in JIT mode when loading the string from the dex cache, as the string ids
+    // of the two different strings were the same).
+    // We know the string ids will be different because lexicographically:
+    // "Goodbye" < "LTransform;" < "hello".
+    System.out.println("hello");
   }
 }
diff --git a/test/902-hello-transformation/transform.cc b/test/902-hello-transformation/transform.cc
index e0d623e..3369dd4 100644
--- a/test/902-hello-transformation/transform.cc
+++ b/test/902-hello-transformation/transform.cc
@@ -23,6 +23,8 @@
 #include "base/logging.h"
 #include "jni.h"
 #include "openjdkjvmti/jvmti.h"
+#include "ti-agent/common_helper.h"
+#include "ti-agent/common_load.h"
 #include "utils.h"
 
 namespace art {
@@ -30,7 +32,6 @@
 
 static bool RuntimeIsJvm = false;
 
-jvmtiEnv* jvmti_env;
 bool IsJVM() {
   return RuntimeIsJvm;
 }
@@ -132,15 +133,13 @@
 jint OnLoad(JavaVM* vm,
             char* options,
             void* reserved ATTRIBUTE_UNUSED) {
-  jvmtiCapabilities caps;
   RuntimeIsJvm = (strcmp("jvm", options) == 0);
   if (vm->GetEnv(reinterpret_cast<void**>(&jvmti_env), JVMTI_VERSION_1_0)) {
     printf("Unable to get jvmti env!\n");
     return 1;
   }
+  SetAllCapabilities(jvmti_env);
   if (IsJVM()) {
-    jvmti_env->GetPotentialCapabilities(&caps);
-    jvmti_env->AddCapabilities(&caps);
     jvmtiEventCallbacks cbs;
     memset(&cbs, 0, sizeof(cbs));
     cbs.ClassFileLoadHook = transformationHook;
diff --git a/test/903-hello-tagging/build b/test/903-hello-tagging/build
new file mode 100755
index 0000000..898e2e5
--- /dev/null
+++ b/test/903-hello-tagging/build
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+./default-build "$@" --experimental agents
diff --git a/test/903-hello-tagging/expected.txt b/test/903-hello-tagging/expected.txt
new file mode 100644
index 0000000..872b79b
--- /dev/null
+++ b/test/903-hello-tagging/expected.txt
@@ -0,0 +1,10 @@
+18
+<nothing>
+18
+[<1;1>, <11;1>, <2;2>, <12;2>, <3;3>, <13;3>, <4;4>, <14;4>, <5;5>, <15;5>, <6;6>, <16;6>, <7;7>, <17;7>, <8;8>, <18;8>, <9;9>, <19;9>]
+4
+[<2;2>, <12;2>, <5;5>, <15;5>]
+18
+[<null;1>, <null;1>, <null;2>, <null;2>, <null;3>, <null;3>, <null;4>, <null;4>, <null;5>, <null;5>, <null;6>, <null;6>, <null;7>, <null;7>, <null;8>, <null;8>, <null;9>, <null;9>]
+18
+[<1;0>, <2;0>, <3;0>, <4;0>, <5;0>, <6;0>, <7;0>, <8;0>, <9;0>, <11;0>, <12;0>, <13;0>, <14;0>, <15;0>, <16;0>, <17;0>, <18;0>, <19;0>]
diff --git a/test/903-hello-tagging/info.txt b/test/903-hello-tagging/info.txt
new file mode 100644
index 0000000..875a5f6
--- /dev/null
+++ b/test/903-hello-tagging/info.txt
@@ -0,0 +1 @@
+Tests basic functions in the jvmti plugin.
diff --git a/test/903-hello-tagging/run b/test/903-hello-tagging/run
new file mode 100755
index 0000000..5e3c0bd
--- /dev/null
+++ b/test/903-hello-tagging/run
@@ -0,0 +1,43 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+plugin=libopenjdkjvmtid.so
+agent=libtiagentd.so
+lib=tiagentd
+if  [[ "$@" == *"-O"* ]]; then
+  agent=libtiagent.so
+  plugin=libopenjdkjvmti.so
+  lib=tiagent
+fi
+
+if [[ "$@" == *"--jvm"* ]]; then
+  arg="jvm"
+else
+  arg="art"
+fi
+
+if [[ "$@" != *"--debuggable"* ]]; then
+  other_args=" -Xcompiler-option --debuggable "
+else
+  other_args=""
+fi
+
+./default-run "$@" --experimental agents \
+                   --experimental runtime-plugins \
+                   --runtime-option -agentpath:${agent}=903-hello-tagging,${arg} \
+                   --android-runtime-option -Xplugin:${plugin} \
+                   ${other_args} \
+                   --args ${lib}
diff --git a/test/903-hello-tagging/src/Main.java b/test/903-hello-tagging/src/Main.java
new file mode 100644
index 0000000..a8aedb4
--- /dev/null
+++ b/test/903-hello-tagging/src/Main.java
@@ -0,0 +1,170 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.ref.WeakReference;
+import java.util.ArrayList;
+import java.util.Arrays;
+
+public class Main {
+  public static void main(String[] args) {
+    System.loadLibrary(args[1]);
+    doTest();
+    testGetTaggedObjects();
+  }
+
+  public static void doTest() {
+    WeakReference<Object> weak = test();
+
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+
+    if (weak.get() != null) {
+      throw new RuntimeException("WeakReference not cleared");
+    }
+  }
+
+  private static WeakReference<Object> test() {
+    Object o1 = new Object();
+    setTag(o1, 1);
+
+    Object o2 = new Object();
+    setTag(o2, 2);
+
+    checkTag(o1, 1);
+    checkTag(o2, 2);
+
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+
+    checkTag(o1, 1);
+    checkTag(o2, 2);
+
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+
+    setTag(o1, 10);
+    setTag(o2, 20);
+
+    checkTag(o1, 10);
+    checkTag(o2, 20);
+
+    return new WeakReference<Object>(o1);
+  }
+
+  private static void checkTag(Object o, long expectedTag) {
+    long tag = getTag(o);
+    if (expectedTag != tag) {
+      throw new RuntimeException("Unexpected tag " + tag + ", expected " + expectedTag);
+    }
+  }
+
+  private static void testGetTaggedObjects() {
+    // Use an array list to ensure that the objects stay live for a bit. Also gives us a source
+    // to compare to. We use index % 10 as the tag.
+    ArrayList<Object> l = new ArrayList<>();
+
+    for (int i = 0; i < 20; i++) {
+      Integer o = new Integer(i);
+      l.add(o);
+      if (i % 10 != 0) {
+        setTag(o, i % 10);
+      }
+    }
+
+    testGetTaggedObjectsRun(l, null, false, false);
+    testGetTaggedObjectsRun(l, null, true, true);
+    testGetTaggedObjectsRun(l, new long[] { 2, 5 }, true, true);
+    testGetTaggedObjectsRun(l, null, false, true);
+    testGetTaggedObjectsRun(l, null, true, false);
+  }
+
+  private static void testGetTaggedObjectsRun(ArrayList<Object> l, long[] searchTags,
+      boolean returnObjects, boolean returnTags) {
+    Object[] result = getTaggedObjects(searchTags, returnObjects, returnTags);
+
+    Object[] objects = (Object[])result[0];
+    long[] tags = (long[])result[1];
+    int count = (int)result[2];
+
+    System.out.println(count);
+    printArraysSorted(objects, tags);
+  }
+
+  private static void printArraysSorted(Object[] objects, long[] tags) {
+    if (objects == null && tags == null) {
+      System.out.println("<nothing>");
+      return;
+    }
+
+    int l1 = objects == null ? 0 : objects.length;
+    int l2 = tags == null ? 0 : tags.length;
+    int l = Math.max(l1, l2);
+    Pair[] tmp = new Pair[l];
+    for (int i = 0; i < l; i++) {
+      tmp[i] = new Pair(objects == null ? null : objects[i], tags == null ? 0 : tags[i]);
+    }
+
+    Arrays.sort(tmp);
+
+    System.out.println(Arrays.toString(tmp));
+  }
+
+  private static class Pair implements Comparable<Pair> {
+    Object obj;
+    long tag;
+    public Pair(Object o, long t) {
+      obj = o;
+      tag = t;
+    }
+
+    public int compareTo(Pair p) {
+      if (tag != p.tag) {
+        return Long.compare(tag, p.tag);
+      }
+
+      if ((obj instanceof Comparable) && (p.obj instanceof Comparable)) {
+        // It's not really correct, but w/e, best effort.
+        int result = ((Comparable<Object>)obj).compareTo(p.obj);
+        if (result != 0) {
+          return result;
+        }
+      }
+
+      if (obj != null && p.obj != null) {
+        return obj.hashCode() - p.obj.hashCode();
+      }
+
+      if (obj != null) {
+        return 1;
+      }
+
+      if (p.obj != null) {
+        return -1;
+      }
+
+      return hashCode() - p.hashCode();
+    }
+
+    public String toString() {
+      return "<" + obj + ";" + tag + ">";
+    }
+  }
+
+  private static native void setTag(Object o, long tag);
+  private static native long getTag(Object o);
+  private static native Object[] getTaggedObjects(long[] searchTags, boolean returnObjects,
+      boolean returnTags);
+}
diff --git a/test/903-hello-tagging/tagging.cc b/test/903-hello-tagging/tagging.cc
new file mode 100644
index 0000000..1557d45
--- /dev/null
+++ b/test/903-hello-tagging/tagging.cc
@@ -0,0 +1,155 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tagging.h"
+
+#include <iostream>
+#include <pthread.h>
+#include <stdio.h>
+#include <vector>
+
+#include "jni.h"
+#include "ScopedLocalRef.h"
+#include "ScopedPrimitiveArray.h"
+
+#include "art_method-inl.h"
+#include "base/logging.h"
+#include "openjdkjvmti/jvmti.h"
+#include "ti-agent/common_helper.h"
+#include "ti-agent/common_load.h"
+#include "utils.h"
+
+namespace art {
+namespace Test903HelloTagging {
+
+extern "C" JNIEXPORT void JNICALL Java_Main_setTag(JNIEnv* env ATTRIBUTE_UNUSED,
+                                                   jclass,
+                                                   jobject obj,
+                                                   jlong tag) {
+  jvmtiError ret = jvmti_env->SetTag(obj, tag);
+  if (ret != JVMTI_ERROR_NONE) {
+    char* err;
+    jvmti_env->GetErrorName(ret, &err);
+    printf("Error setting tag: %s\n", err);
+  }
+}
+
+extern "C" JNIEXPORT jlong JNICALL Java_Main_getTag(JNIEnv* env ATTRIBUTE_UNUSED,
+                                                    jclass,
+                                                    jobject obj) {
+  jlong tag = 0;
+  jvmtiError ret = jvmti_env->GetTag(obj, &tag);
+  if (ret != JVMTI_ERROR_NONE) {
+    char* err;
+    jvmti_env->GetErrorName(ret, &err);
+    printf("Error getting tag: %s\n", err);
+  }
+  return tag;
+}
+
+extern "C" JNIEXPORT jobjectArray JNICALL Java_Main_getTaggedObjects(JNIEnv* env,
+                                                                     jclass,
+                                                                     jlongArray searchTags,
+                                                                     jboolean returnObjects,
+                                                                     jboolean returnTags) {
+  ScopedLongArrayRO scoped_array(env);
+  if (searchTags != nullptr) {
+    scoped_array.reset(searchTags);
+  }
+  const jlong* tag_ptr = scoped_array.get();
+  if (tag_ptr == nullptr) {
+    // Can never pass null.
+    tag_ptr = reinterpret_cast<const jlong*>(1);
+  }
+
+  jint result_count;
+  jobject* result_object_array;
+  jobject** result_object_array_ptr = returnObjects == JNI_TRUE ? &result_object_array : nullptr;
+  jlong* result_tag_array;
+  jlong** result_tag_array_ptr = returnTags == JNI_TRUE ? &result_tag_array : nullptr;
+
+  jvmtiError ret = jvmti_env->GetObjectsWithTags(scoped_array.size(),
+                                                 tag_ptr,
+                                                 &result_count,
+                                                 result_object_array_ptr,
+                                                 result_tag_array_ptr);
+  if (ret != JVMTI_ERROR_NONE) {
+    char* err;
+    jvmti_env->GetErrorName(ret, &err);
+    printf("Failure running GetLoadedClasses: %s\n", err);
+    return nullptr;
+  }
+
+  CHECK_GE(result_count, 0);
+
+  ScopedLocalRef<jclass> obj_class(env, env->FindClass("java/lang/Object"));
+  if (obj_class.get() == nullptr) {
+    return nullptr;
+  }
+
+  jobjectArray resultObjectArray = nullptr;
+  if (returnObjects == JNI_TRUE) {
+    resultObjectArray = env->NewObjectArray(result_count, obj_class.get(), nullptr);
+    if (resultObjectArray == nullptr) {
+      return nullptr;
+    }
+    for (jint i = 0; i < result_count; ++i) {
+      env->SetObjectArrayElement(resultObjectArray, i, result_object_array[i]);
+    }
+  }
+
+  jlongArray resultTagArray = nullptr;
+  if (returnTags == JNI_TRUE) {
+    resultTagArray = env->NewLongArray(result_count);
+    env->SetLongArrayRegion(resultTagArray, 0, result_count, result_tag_array);
+  }
+
+  jobject count_integer;
+  {
+    ScopedLocalRef<jclass> integer_class(env, env->FindClass("java/lang/Integer"));
+    jmethodID methodID = env->GetMethodID(integer_class.get(), "<init>", "(I)V");
+    count_integer = env->NewObject(integer_class.get(), methodID, result_count);
+    if (count_integer == nullptr) {
+      return nullptr;
+    }
+  }
+
+  jobjectArray resultArray = env->NewObjectArray(3, obj_class.get(), nullptr);
+  if (resultArray == nullptr) {
+    return nullptr;
+  }
+  env->SetObjectArrayElement(resultArray, 0, resultObjectArray);
+  env->SetObjectArrayElement(resultArray, 1, resultTagArray);
+  env->SetObjectArrayElement(resultArray, 2, count_integer);
+
+  return resultArray;
+}
+
+// Don't do anything
+jint OnLoad(JavaVM* vm,
+            char* options ATTRIBUTE_UNUSED,
+            void* reserved ATTRIBUTE_UNUSED) {
+  if (vm->GetEnv(reinterpret_cast<void**>(&jvmti_env), JVMTI_VERSION_1_0)) {
+    printf("Unable to get jvmti env!\n");
+    return 1;
+  }
+  SetAllCapabilities(jvmti_env);
+  return 0;
+}
+
+}  // namespace Test903HelloTagging
+}  // namespace art
+
diff --git a/test/562-no-intermediate/src/Main.java b/test/903-hello-tagging/tagging.h
similarity index 65%
copy from test/562-no-intermediate/src/Main.java
copy to test/903-hello-tagging/tagging.h
index 3b74d6f..f062d44 100644
--- a/test/562-no-intermediate/src/Main.java
+++ b/test/903-hello-tagging/tagging.h
@@ -14,14 +14,17 @@
  * limitations under the License.
  */
 
-public class Main {
+#ifndef ART_TEST_903_HELLO_TAGGING_TAGGING_H_
+#define ART_TEST_903_HELLO_TAGGING_TAGGING_H_
 
-  /// CHECK-START-ARM64: int Main.main(String[]) register_allocator (after)
-  /// CHECK-NOT: IntermediateAddress
-  public static void main(String[] args) {
-    array[index] += Math.cos(42);
-  }
+#include <jni.h>
 
-  static int index = 0;
-  static double[] array = new double[2];
-}
+namespace art {
+namespace Test903HelloTagging {
+
+jint OnLoad(JavaVM* vm, char* options, void* reserved);
+
+}  // namespace Test903HelloTagging
+}  // namespace art
+
+#endif  // ART_TEST_903_HELLO_TAGGING_TAGGING_H_
diff --git a/test/904-object-allocation/build b/test/904-object-allocation/build
new file mode 100755
index 0000000..898e2e5
--- /dev/null
+++ b/test/904-object-allocation/build
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+./default-build "$@" --experimental agents
diff --git a/test/904-object-allocation/expected.txt b/test/904-object-allocation/expected.txt
new file mode 100644
index 0000000..371d2b7
--- /dev/null
+++ b/test/904-object-allocation/expected.txt
@@ -0,0 +1,8 @@
+ObjectAllocated type java.lang.Object/java.lang.Object size 8
+ObjectAllocated type java.lang.Integer/java.lang.Integer size 16
+ObjectAllocated type java.lang.Short/java.lang.Short size 16
+Tracking on same thread
+ObjectAllocated type java.lang.Double/java.lang.Double size 16
+Tracking on same thread, not disabling tracking
+ObjectAllocated type java.lang.Double/java.lang.Double size 16
+Tracking on different thread
diff --git a/test/904-object-allocation/info.txt b/test/904-object-allocation/info.txt
new file mode 100644
index 0000000..875a5f6
--- /dev/null
+++ b/test/904-object-allocation/info.txt
@@ -0,0 +1 @@
+Tests basic functions in the jvmti plugin.
diff --git a/test/904-object-allocation/run b/test/904-object-allocation/run
new file mode 100755
index 0000000..2f7ad21
--- /dev/null
+++ b/test/904-object-allocation/run
@@ -0,0 +1,43 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+plugin=libopenjdkjvmtid.so
+agent=libtiagentd.so
+lib=tiagentd
+if  [[ "$@" == *"-O"* ]]; then
+  agent=libtiagent.so
+  plugin=libopenjdkjvmti.so
+  lib=tiagent
+fi
+
+if [[ "$@" == *"--jvm"* ]]; then
+  arg="jvm"
+else
+  arg="art"
+fi
+
+if [[ "$@" != *"--debuggable"* ]]; then
+  other_args=" -Xcompiler-option --debuggable "
+else
+  other_args=""
+fi
+
+./default-run "$@" --experimental agents \
+                   --experimental runtime-plugins \
+                   --runtime-option -agentpath:${agent}=904-object-allocation,${arg} \
+                   --android-runtime-option -Xplugin:${plugin} \
+                   ${other_args} \
+                   --args ${lib}
diff --git a/test/904-object-allocation/src/Main.java b/test/904-object-allocation/src/Main.java
new file mode 100644
index 0000000..fc8a112
--- /dev/null
+++ b/test/904-object-allocation/src/Main.java
@@ -0,0 +1,143 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    System.loadLibrary(args[1]);
+
+    // Use a list to ensure objects must be allocated.
+    ArrayList<Object> l = new ArrayList<>(100);
+
+    prefetchClassNames();
+
+    doTest(l);
+  }
+
+  // Pre-resolve class names so the strings don't have to be allocated as a side effect of
+  // callback printing.
+  private static void prefetchClassNames() {
+      Object.class.getName();
+      Integer.class.getName();
+      Float.class.getName();
+      Short.class.getName();
+      Byte.class.getName();
+      Double.class.getName();
+  }
+
+  public static void doTest(ArrayList<Object> l) throws Exception {
+    // Disable the global registration from OnLoad, to get into a known state.
+    enableAllocationTracking(null, false);
+
+    // Enable actual logging callback.
+    setupObjectAllocCallback(true);
+
+    enableAllocationTracking(null, true);
+
+    l.add(new Object());
+    l.add(new Integer(1));
+
+    enableAllocationTracking(null, false);
+
+    l.add(new Float(1.0f));
+
+    enableAllocationTracking(Thread.currentThread(), true);
+
+    l.add(new Short((short)0));
+
+    enableAllocationTracking(Thread.currentThread(), false);
+
+    l.add(new Byte((byte)0));
+
+    System.out.println("Tracking on same thread");
+
+    testThread(l, true, true);
+
+    l.add(new Byte((byte)0));
+
+    System.out.println("Tracking on same thread, not disabling tracking");
+
+    testThread(l, true, false);
+
+    System.out.println("Tracking on different thread");
+
+    testThread(l, false, true);
+
+    l.add(new Byte((byte)0));
+
+    // Disable actual logging callback and re-enable tracking, so we can keep the event enabled and
+    // check that shutdown works correctly.
+    setupObjectAllocCallback(false);
+    enableAllocationTracking(null, true);
+  }
+
+  private static void testThread(final ArrayList<Object> l, final boolean sameThread,
+      final boolean disableTracking) throws Exception {
+    final SimpleBarrier startBarrier = new SimpleBarrier(1);
+    final SimpleBarrier trackBarrier = new SimpleBarrier(1);
+    final SimpleBarrier disableBarrier = new SimpleBarrier(1);
+
+    final Thread thisThread = Thread.currentThread();
+
+    Thread t = new Thread() {
+      public void run() {
+        try {
+          startBarrier.dec();
+          trackBarrier.waitFor();
+        } catch (Exception e) {
+          e.printStackTrace(System.out);
+          System.exit(1);
+        }
+
+        l.add(new Double(0.0));
+
+        if (disableTracking) {
+          enableAllocationTracking(sameThread ? this : thisThread, false);
+        }
+      }
+    };
+
+    t.start();
+    startBarrier.waitFor();
+    enableAllocationTracking(sameThread ? t : Thread.currentThread(), true);
+    trackBarrier.dec();
+
+    t.join();
+  }
+
+  private static class SimpleBarrier {
+    int count;
+
+    public SimpleBarrier(int i) {
+      count = i;
+    }
+
+    public synchronized void dec() throws Exception {
+      count--;
+      notifyAll();
+    }
+
+    public synchronized void waitFor() throws Exception  {
+      while (count != 0) {
+        wait();
+      }
+    }
+  }
+
+  private static native void setupObjectAllocCallback(boolean enable);
+  private static native void enableAllocationTracking(Thread thread, boolean enable);
+}
diff --git a/test/904-object-allocation/tracking.cc b/test/904-object-allocation/tracking.cc
new file mode 100644
index 0000000..9261a9f
--- /dev/null
+++ b/test/904-object-allocation/tracking.cc
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tracking.h"
+
+#include <iostream>
+#include <pthread.h>
+#include <stdio.h>
+#include <vector>
+
+#include "base/logging.h"
+#include "jni.h"
+#include "openjdkjvmti/jvmti.h"
+#include "ScopedLocalRef.h"
+#include "ScopedUtfChars.h"
+#include "ti-agent/common_helper.h"
+#include "ti-agent/common_load.h"
+#include "utils.h"
+
+namespace art {
+namespace Test904ObjectAllocation {
+
+static std::string GetClassName(JNIEnv* jni_env, jclass cls) {
+  ScopedLocalRef<jclass> class_class(jni_env, jni_env->GetObjectClass(cls));
+  jmethodID mid = jni_env->GetMethodID(class_class.get(), "getName", "()Ljava/lang/String;");
+  ScopedLocalRef<jstring> str(
+      jni_env, reinterpret_cast<jstring>(jni_env->CallObjectMethod(cls, mid)));
+  ScopedUtfChars utf_chars(jni_env, str.get());
+  return utf_chars.c_str();
+}
+
+static void JNICALL ObjectAllocated(jvmtiEnv* ti_env ATTRIBUTE_UNUSED,
+                                    JNIEnv* jni_env,
+                                    jthread thread ATTRIBUTE_UNUSED,
+                                    jobject object,
+                                    jclass object_klass,
+                                    jlong size) {
+  std::string object_klass_descriptor = GetClassName(jni_env, object_klass);
+  ScopedLocalRef<jclass> object_klass2(jni_env, jni_env->GetObjectClass(object));
+  std::string object_klass_descriptor2 = GetClassName(jni_env, object_klass2.get());
+
+  printf("ObjectAllocated type %s/%s size %zu\n",
+         object_klass_descriptor.c_str(),
+         object_klass_descriptor2.c_str(),
+         static_cast<size_t>(size));
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_setupObjectAllocCallback(
+    JNIEnv* env ATTRIBUTE_UNUSED, jclass klass ATTRIBUTE_UNUSED, jboolean enable) {
+  jvmtiEventCallbacks callbacks;
+  memset(&callbacks, 0, sizeof(jvmtiEventCallbacks));
+  callbacks.VMObjectAlloc = enable ? ObjectAllocated : nullptr;
+
+  jvmtiError ret = jvmti_env->SetEventCallbacks(&callbacks, sizeof(callbacks));
+  if (ret != JVMTI_ERROR_NONE) {
+    char* err;
+    jvmti_env->GetErrorName(ret, &err);
+    printf("Error setting callbacks: %s\n", err);
+  }
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_enableAllocationTracking(JNIEnv* env ATTRIBUTE_UNUSED,
+                                                                     jclass,
+                                                                     jthread thread,
+                                                                     jboolean enable) {
+  jvmtiError ret = jvmti_env->SetEventNotificationMode(
+      enable ? JVMTI_ENABLE : JVMTI_DISABLE,
+      JVMTI_EVENT_VM_OBJECT_ALLOC,
+      thread);
+  if (ret != JVMTI_ERROR_NONE) {
+    char* err;
+    jvmti_env->GetErrorName(ret, &err);
+    printf("Error enabling/disabling allocation tracking: %s\n", err);
+  }
+}
+
+// Don't do anything
+jint OnLoad(JavaVM* vm,
+            char* options ATTRIBUTE_UNUSED,
+            void* reserved ATTRIBUTE_UNUSED) {
+  if (vm->GetEnv(reinterpret_cast<void**>(&jvmti_env), JVMTI_VERSION_1_0)) {
+    printf("Unable to get jvmti env!\n");
+    return 1;
+  }
+  jvmti_env->SetEventNotificationMode(JVMTI_ENABLE, JVMTI_EVENT_VM_OBJECT_ALLOC, nullptr);
+  SetAllCapabilities(jvmti_env);
+  return 0;
+}
+
+}  // namespace Test904ObjectAllocation
+}  // namespace art
+
diff --git a/test/562-no-intermediate/src/Main.java b/test/904-object-allocation/tracking.h
similarity index 63%
copy from test/562-no-intermediate/src/Main.java
copy to test/904-object-allocation/tracking.h
index 3b74d6f..21c1837 100644
--- a/test/562-no-intermediate/src/Main.java
+++ b/test/904-object-allocation/tracking.h
@@ -14,14 +14,17 @@
  * limitations under the License.
  */
 
-public class Main {
+#ifndef ART_TEST_904_OBJECT_ALLOCATION_TRACKING_H_
+#define ART_TEST_904_OBJECT_ALLOCATION_TRACKING_H_
 
-  /// CHECK-START-ARM64: int Main.main(String[]) register_allocator (after)
-  /// CHECK-NOT: IntermediateAddress
-  public static void main(String[] args) {
-    array[index] += Math.cos(42);
-  }
+#include <jni.h>
 
-  static int index = 0;
-  static double[] array = new double[2];
-}
+namespace art {
+namespace Test904ObjectAllocation {
+
+jint OnLoad(JavaVM* vm, char* options, void* reserved);
+
+}  // namespace Test904ObjectAllocation
+}  // namespace art
+
+#endif  // ART_TEST_904_OBJECT_ALLOCATION_TRACKING_H_
diff --git a/test/905-object-free/build b/test/905-object-free/build
new file mode 100755
index 0000000..898e2e5
--- /dev/null
+++ b/test/905-object-free/build
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+./default-build "$@" --experimental agents
diff --git a/test/905-object-free/expected.txt b/test/905-object-free/expected.txt
new file mode 100644
index 0000000..436ca11
--- /dev/null
+++ b/test/905-object-free/expected.txt
@@ -0,0 +1,12 @@
+[1]
+---
+[10, 100, 1000]
+---
+[]
+---
+[]
+---
+[]
+---
+[]
+---
diff --git a/test/905-object-free/info.txt b/test/905-object-free/info.txt
new file mode 100644
index 0000000..875a5f6
--- /dev/null
+++ b/test/905-object-free/info.txt
@@ -0,0 +1 @@
+Tests basic functions in the jvmti plugin.
diff --git a/test/905-object-free/run b/test/905-object-free/run
new file mode 100755
index 0000000..753b742
--- /dev/null
+++ b/test/905-object-free/run
@@ -0,0 +1,43 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+plugin=libopenjdkjvmtid.so
+agent=libtiagentd.so
+lib=tiagentd
+if  [[ "$@" == *"-O"* ]]; then
+  agent=libtiagent.so
+  plugin=libopenjdkjvmti.so
+  lib=tiagent
+fi
+
+if [[ "$@" == *"--jvm"* ]]; then
+  arg="jvm"
+else
+  arg="art"
+fi
+
+if [[ "$@" != *"--debuggable"* ]]; then
+  other_args=" -Xcompiler-option --debuggable "
+else
+  other_args=""
+fi
+
+./default-run "$@" --experimental agents \
+                   --experimental runtime-plugins \
+                   --runtime-option -agentpath:${agent}=905-object-free,${arg} \
+                   --android-runtime-option -Xplugin:${plugin} \
+                   ${other_args} \
+                   --args ${lib}
diff --git a/test/905-object-free/src/Main.java b/test/905-object-free/src/Main.java
new file mode 100644
index 0000000..16dec5d
--- /dev/null
+++ b/test/905-object-free/src/Main.java
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.Arrays;
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    System.loadLibrary(args[1]);
+
+    doTest();
+  }
+
+  public static void doTest() throws Exception {
+    // Use a list to ensure objects must be allocated.
+    ArrayList<Object> l = new ArrayList<>(100);
+
+    setupObjectFreeCallback();
+
+    enableFreeTracking(true);
+    run(l);
+
+    enableFreeTracking(false);
+    run(l);
+  }
+
+  private static void run(ArrayList<Object> l) {
+    allocate(l, 1);
+    l.clear();
+
+    Runtime.getRuntime().gc();
+
+    getAndPrintTags();
+    System.out.println("---");
+
+    // Note: the reporting will not depend on the heap layout (which could be unstable). Walking
+    //       the tag table should give us a stable output order.
+    for (int i = 10; i <= 1000; i *= 10) {
+      allocate(l, i);
+    }
+    l.clear();
+
+    Runtime.getRuntime().gc();
+
+    getAndPrintTags();
+    System.out.println("---");
+
+    Runtime.getRuntime().gc();
+
+    getAndPrintTags();
+    System.out.println("---");
+  }
+
+  private static void allocate(ArrayList<Object> l, long tag) {
+    Object obj = new Object();
+    l.add(obj);
+    setTag(obj, tag);
+  }
+
+  private static void getAndPrintTags() {
+    long[] freedTags = getCollectedTags();
+    Arrays.sort(freedTags);
+    System.out.println(Arrays.toString(freedTags));
+  }
+
+  private static native void setupObjectFreeCallback();
+  private static native void enableFreeTracking(boolean enable);
+  private static native void setTag(Object o, long tag);
+  private static native long[] getCollectedTags();
+}
diff --git a/test/905-object-free/tracking_free.cc b/test/905-object-free/tracking_free.cc
new file mode 100644
index 0000000..fc43acc
--- /dev/null
+++ b/test/905-object-free/tracking_free.cc
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "tracking_free.h"
+
+#include <iostream>
+#include <pthread.h>
+#include <stdio.h>
+#include <vector>
+
+#include "base/logging.h"
+#include "jni.h"
+#include "openjdkjvmti/jvmti.h"
+#include "ScopedLocalRef.h"
+#include "ScopedUtfChars.h"
+#include "ti-agent/common_helper.h"
+#include "ti-agent/common_load.h"
+#include "utils.h"
+
+namespace art {
+namespace Test905ObjectFree {
+
+static std::vector<jlong> collected_tags;
+
+static void JNICALL ObjectFree(jvmtiEnv* ti_env ATTRIBUTE_UNUSED, jlong tag) {
+  collected_tags.push_back(tag);
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_setupObjectFreeCallback(
+    JNIEnv* env ATTRIBUTE_UNUSED, jclass klass ATTRIBUTE_UNUSED) {
+  jvmtiEventCallbacks callbacks;
+  memset(&callbacks, 0, sizeof(jvmtiEventCallbacks));
+  callbacks.ObjectFree = ObjectFree;
+
+  jvmtiError ret = jvmti_env->SetEventCallbacks(&callbacks, sizeof(callbacks));
+  if (ret != JVMTI_ERROR_NONE) {
+    char* err;
+    jvmti_env->GetErrorName(ret, &err);
+    printf("Error setting callbacks: %s\n", err);
+  }
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_enableFreeTracking(JNIEnv* env ATTRIBUTE_UNUSED,
+                                                               jclass klass ATTRIBUTE_UNUSED,
+                                                               jboolean enable) {
+  jvmtiError ret = jvmti_env->SetEventNotificationMode(
+      enable ? JVMTI_ENABLE : JVMTI_DISABLE,
+      JVMTI_EVENT_OBJECT_FREE,
+      nullptr);
+  if (ret != JVMTI_ERROR_NONE) {
+    char* err;
+    jvmti_env->GetErrorName(ret, &err);
+    printf("Error enabling/disabling object-free callbacks: %s\n", err);
+  }
+}
+
+extern "C" JNIEXPORT jlongArray JNICALL Java_Main_getCollectedTags(JNIEnv* env,
+                                                                   jclass klass ATTRIBUTE_UNUSED) {
+  jlongArray ret = env->NewLongArray(collected_tags.size());
+  if (ret == nullptr) {
+    return ret;
+  }
+
+  env->SetLongArrayRegion(ret, 0, collected_tags.size(), collected_tags.data());
+  collected_tags.clear();
+
+  return ret;
+}
+
+// Don't do anything
+jint OnLoad(JavaVM* vm,
+            char* options ATTRIBUTE_UNUSED,
+            void* reserved ATTRIBUTE_UNUSED) {
+  if (vm->GetEnv(reinterpret_cast<void**>(&jvmti_env), JVMTI_VERSION_1_0)) {
+    printf("Unable to get jvmti env!\n");
+    return 1;
+  }
+  SetAllCapabilities(jvmti_env);
+  return 0;
+}
+
+}  // namespace Test905ObjectFree
+}  // namespace art
diff --git a/test/562-no-intermediate/src/Main.java b/test/905-object-free/tracking_free.h
similarity index 64%
copy from test/562-no-intermediate/src/Main.java
copy to test/905-object-free/tracking_free.h
index 3b74d6f..ba4aa43 100644
--- a/test/562-no-intermediate/src/Main.java
+++ b/test/905-object-free/tracking_free.h
@@ -14,14 +14,17 @@
  * limitations under the License.
  */
 
-public class Main {
+#ifndef ART_TEST_905_OBJECT_FREE_TRACKING_FREE_H_
+#define ART_TEST_905_OBJECT_FREE_TRACKING_FREE_H_
 
-  /// CHECK-START-ARM64: int Main.main(String[]) register_allocator (after)
-  /// CHECK-NOT: IntermediateAddress
-  public static void main(String[] args) {
-    array[index] += Math.cos(42);
-  }
+#include <jni.h>
 
-  static int index = 0;
-  static double[] array = new double[2];
-}
+namespace art {
+namespace Test905ObjectFree {
+
+jint OnLoad(JavaVM* vm, char* options, void* reserved);
+
+}  // namespace Test905ObjectFree
+}  // namespace art
+
+#endif  // ART_TEST_905_OBJECT_FREE_TRACKING_FREE_H_
diff --git a/test/906-iterate-heap/build b/test/906-iterate-heap/build
new file mode 100755
index 0000000..898e2e5
--- /dev/null
+++ b/test/906-iterate-heap/build
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+./default-build "$@" --experimental agents
diff --git a/test/906-iterate-heap/expected.txt b/test/906-iterate-heap/expected.txt
new file mode 100644
index 0000000..72cd47d
--- /dev/null
+++ b/test/906-iterate-heap/expected.txt
@@ -0,0 +1,2 @@
+[{tag=1, class-tag=0, size=8, length=-1}, {tag=2, class-tag=100, size=8, length=-1}, {tag=3, class-tag=100, size=8, length=-1}, {tag=4, class-tag=0, size=32, length=5}, {tag=100, class-tag=0, size=<class>, length=-1}]
+[{tag=11, class-tag=0, size=8, length=-1}, {tag=12, class-tag=110, size=8, length=-1}, {tag=13, class-tag=110, size=8, length=-1}, {tag=14, class-tag=0, size=32, length=5}, {tag=110, class-tag=0, size=<class>, length=-1}]
diff --git a/test/906-iterate-heap/info.txt b/test/906-iterate-heap/info.txt
new file mode 100644
index 0000000..875a5f6
--- /dev/null
+++ b/test/906-iterate-heap/info.txt
@@ -0,0 +1 @@
+Tests basic functions in the jvmti plugin.
diff --git a/test/906-iterate-heap/iterate_heap.cc b/test/906-iterate-heap/iterate_heap.cc
new file mode 100644
index 0000000..8dac89d
--- /dev/null
+++ b/test/906-iterate-heap/iterate_heap.cc
@@ -0,0 +1,189 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "iterate_heap.h"
+
+#include <iostream>
+#include <pthread.h>
+#include <stdio.h>
+#include <vector>
+
+#include "base/logging.h"
+#include "jni.h"
+#include "openjdkjvmti/jvmti.h"
+#include "ScopedPrimitiveArray.h"
+#include "ti-agent/common_helper.h"
+#include "ti-agent/common_load.h"
+
+namespace art {
+namespace Test906IterateHeap {
+
+class IterationConfig {
+ public:
+  IterationConfig() {}
+  virtual ~IterationConfig() {}
+
+  virtual jint Handle(jlong class_tag, jlong size, jlong* tag_ptr, jint length) = 0;
+};
+
+static jint JNICALL HeapIterationCallback(jlong class_tag,
+                                          jlong size,
+                                          jlong* tag_ptr,
+                                          jint length,
+                                          void* user_data) {
+  IterationConfig* config = reinterpret_cast<IterationConfig*>(user_data);
+  return config->Handle(class_tag, size, tag_ptr, length);
+}
+
+static bool Run(jint heap_filter, jclass klass_filter, IterationConfig* config) {
+  jvmtiHeapCallbacks callbacks;
+  memset(&callbacks, 0, sizeof(jvmtiHeapCallbacks));
+  callbacks.heap_iteration_callback = HeapIterationCallback;
+
+  jvmtiError ret = jvmti_env->IterateThroughHeap(heap_filter,
+                                                 klass_filter,
+                                                 &callbacks,
+                                                 config);
+  if (ret != JVMTI_ERROR_NONE) {
+    char* err;
+    jvmti_env->GetErrorName(ret, &err);
+    printf("Failure running IterateThroughHeap: %s\n", err);
+    return false;
+  }
+  return true;
+}
+
+extern "C" JNIEXPORT jint JNICALL Java_Main_iterateThroughHeapCount(JNIEnv* env ATTRIBUTE_UNUSED,
+                                                                    jclass klass ATTRIBUTE_UNUSED,
+                                                                    jint heap_filter,
+                                                                    jclass klass_filter,
+                                                                    jint stop_after) {
+  class CountIterationConfig : public IterationConfig {
+   public:
+    CountIterationConfig(jint _counter, jint _stop_after)
+        : counter(_counter),
+          stop_after(_stop_after) {
+    }
+
+    jint Handle(jlong class_tag ATTRIBUTE_UNUSED,
+                jlong size ATTRIBUTE_UNUSED,
+                jlong* tag_ptr ATTRIBUTE_UNUSED,
+                jint length ATTRIBUTE_UNUSED) OVERRIDE {
+      counter++;
+      if (counter == stop_after) {
+        return JVMTI_VISIT_ABORT;
+      }
+      return 0;
+    }
+
+    jint counter;
+    const jint stop_after;
+  };
+
+  CountIterationConfig config(0, stop_after);
+  Run(heap_filter, klass_filter, &config);
+
+  if (config.counter > config.stop_after) {
+    printf("Error: more objects visited than signaled.");
+  }
+
+  return config.counter;
+}
+
+
+extern "C" JNIEXPORT jint JNICALL Java_Main_iterateThroughHeapData(JNIEnv* env,
+                                                                   jclass klass ATTRIBUTE_UNUSED,
+                                                                   jint heap_filter,
+                                                                   jclass klass_filter,
+                                                                   jlongArray class_tags,
+                                                                   jlongArray sizes,
+                                                                   jlongArray tags,
+                                                                   jintArray lengths) {
+  class DataIterationConfig : public IterationConfig {
+   public:
+    jint Handle(jlong class_tag, jlong size, jlong* tag_ptr, jint length) OVERRIDE {
+      class_tags_.push_back(class_tag);
+      sizes_.push_back(size);
+      tags_.push_back(*tag_ptr);
+      lengths_.push_back(length);
+
+      return 0;  // Continue.
+    }
+
+    std::vector<jlong> class_tags_;
+    std::vector<jlong> sizes_;
+    std::vector<jlong> tags_;
+    std::vector<jint> lengths_;
+  };
+
+  DataIterationConfig config;
+  if (!Run(heap_filter, klass_filter, &config)) {
+    return -1;
+  }
+
+  ScopedLongArrayRW s_class_tags(env, class_tags);
+  ScopedLongArrayRW s_sizes(env, sizes);
+  ScopedLongArrayRW s_tags(env, tags);
+  ScopedIntArrayRW s_lengths(env, lengths);
+
+  for (size_t i = 0; i != config.class_tags_.size(); ++i) {
+    s_class_tags[i] = config.class_tags_[i];
+    s_sizes[i] = config.sizes_[i];
+    s_tags[i] = config.tags_[i];
+    s_lengths[i] = config.lengths_[i];
+  }
+
+  return static_cast<jint>(config.class_tags_.size());
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_iterateThroughHeapAdd(JNIEnv* env ATTRIBUTE_UNUSED,
+                                                                  jclass klass ATTRIBUTE_UNUSED,
+                                                                  jint heap_filter,
+                                                                  jclass klass_filter) {
+  class AddIterationConfig : public IterationConfig {
+   public:
+    AddIterationConfig() {}
+
+    jint Handle(jlong class_tag ATTRIBUTE_UNUSED,
+                jlong size ATTRIBUTE_UNUSED,
+                jlong* tag_ptr,
+                jint length ATTRIBUTE_UNUSED) OVERRIDE {
+      jlong current_tag = *tag_ptr;
+      if (current_tag != 0) {
+        *tag_ptr = current_tag + 10;
+      }
+      return 0;
+    }
+  };
+
+  AddIterationConfig config;
+  Run(heap_filter, klass_filter, &config);
+}
+
+// Don't do anything
+jint OnLoad(JavaVM* vm,
+            char* options ATTRIBUTE_UNUSED,
+            void* reserved ATTRIBUTE_UNUSED) {
+  if (vm->GetEnv(reinterpret_cast<void**>(&jvmti_env), JVMTI_VERSION_1_0)) {
+    printf("Unable to get jvmti env!\n");
+    return 1;
+  }
+  SetAllCapabilities(jvmti_env);
+  return 0;
+}
+
+}  // namespace Test906IterateHeap
+}  // namespace art
diff --git a/test/562-no-intermediate/src/Main.java b/test/906-iterate-heap/iterate_heap.h
similarity index 64%
copy from test/562-no-intermediate/src/Main.java
copy to test/906-iterate-heap/iterate_heap.h
index 3b74d6f..f25cdba 100644
--- a/test/562-no-intermediate/src/Main.java
+++ b/test/906-iterate-heap/iterate_heap.h
@@ -14,14 +14,17 @@
  * limitations under the License.
  */
 
-public class Main {
+#ifndef ART_TEST_906_ITERATE_HEAP_ITERATE_HEAP_H_
+#define ART_TEST_906_ITERATE_HEAP_ITERATE_HEAP_H_
 
-  /// CHECK-START-ARM64: int Main.main(String[]) register_allocator (after)
-  /// CHECK-NOT: IntermediateAddress
-  public static void main(String[] args) {
-    array[index] += Math.cos(42);
-  }
+#include <jni.h>
 
-  static int index = 0;
-  static double[] array = new double[2];
-}
+namespace art {
+namespace Test906IterateHeap {
+
+jint OnLoad(JavaVM* vm, char* options, void* reserved);
+
+}  // namespace Test906IterateHeap
+}  // namespace art
+
+#endif  // ART_TEST_906_ITERATE_HEAP_ITERATE_HEAP_H_
diff --git a/test/906-iterate-heap/run b/test/906-iterate-heap/run
new file mode 100755
index 0000000..3e135a3
--- /dev/null
+++ b/test/906-iterate-heap/run
@@ -0,0 +1,43 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+plugin=libopenjdkjvmtid.so
+agent=libtiagentd.so
+lib=tiagentd
+if  [[ "$@" == *"-O"* ]]; then
+  agent=libtiagent.so
+  plugin=libopenjdkjvmti.so
+  lib=tiagent
+fi
+
+if [[ "$@" == *"--jvm"* ]]; then
+  arg="jvm"
+else
+  arg="art"
+fi
+
+if [[ "$@" != *"--debuggable"* ]]; then
+  other_args=" -Xcompiler-option --debuggable "
+else
+  other_args=""
+fi
+
+./default-run "$@" --experimental agents \
+                   --experimental runtime-plugins \
+                   --runtime-option -agentpath:${agent}=906-iterate-heap,${arg} \
+                   --android-runtime-option -Xplugin:${plugin} \
+                   ${other_args} \
+                   --args ${lib}
diff --git a/test/906-iterate-heap/src/Main.java b/test/906-iterate-heap/src/Main.java
new file mode 100644
index 0000000..544a365
--- /dev/null
+++ b/test/906-iterate-heap/src/Main.java
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.Collections;
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    System.loadLibrary(args[1]);
+
+    doTest();
+  }
+
+  public static void doTest() throws Exception {
+    A a = new A();
+    B b = new B();
+    B b2 = new B();
+    C c = new C();
+    A[] aArray = new A[5];
+
+    setTag(a, 1);
+    setTag(b, 2);
+    setTag(b2, 3);
+    setTag(aArray, 4);
+    setTag(B.class, 100);
+
+    int all = iterateThroughHeapCount(0, null, Integer.MAX_VALUE);
+    int tagged = iterateThroughHeapCount(HEAP_FILTER_OUT_UNTAGGED, null, Integer.MAX_VALUE);
+    int untagged = iterateThroughHeapCount(HEAP_FILTER_OUT_TAGGED, null, Integer.MAX_VALUE);
+    int taggedClass = iterateThroughHeapCount(HEAP_FILTER_OUT_CLASS_UNTAGGED, null,
+        Integer.MAX_VALUE);
+    int untaggedClass = iterateThroughHeapCount(HEAP_FILTER_OUT_CLASS_TAGGED, null,
+        Integer.MAX_VALUE);
+
+    if (all != tagged + untagged) {
+      throw new IllegalStateException("Instances: " + all + " != " + tagged + " + " + untagged);
+    }
+    if (all != taggedClass + untaggedClass) {
+      throw new IllegalStateException("By class: " + all + " != " + taggedClass + " + " +
+          untaggedClass);
+    }
+    if (tagged != 5) {
+      throw new IllegalStateException(tagged + " tagged objects");
+    }
+    if (taggedClass != 2) {
+      throw new IllegalStateException(tagged + " objects with tagged class");
+    }
+    if (all == tagged) {
+      throw new IllegalStateException("All objects tagged");
+    }
+    if (all == taggedClass) {
+      throw new IllegalStateException("All objects have tagged class");
+    }
+
+    long classTags[] = new long[100];
+    long sizes[] = new long[100];
+    long tags[] = new long[100];
+    int lengths[] = new int[100];
+
+    int n = iterateThroughHeapData(HEAP_FILTER_OUT_UNTAGGED, null, classTags, sizes, tags, lengths);
+    System.out.println(sort(n, classTags, sizes, tags, lengths));
+
+    iterateThroughHeapAdd(HEAP_FILTER_OUT_UNTAGGED, null);
+    n = iterateThroughHeapData(HEAP_FILTER_OUT_UNTAGGED, null, classTags, sizes, tags, lengths);
+    System.out.println(sort(n, classTags, sizes, tags, lengths));
+  }
+
+  static class A {
+  }
+
+  static class B {
+  }
+
+  static class C {
+  }
+
+  static class HeapElem implements Comparable<HeapElem> {
+    long classTag;
+    long size;
+    long tag;
+    int length;
+
+    public int compareTo(HeapElem other) {
+      if (tag != other.tag) {
+        return Long.compare(tag, other.tag);
+      }
+      if (classTag != other.classTag) {
+        return Long.compare(classTag, other.classTag);
+      }
+      if (size != other.size) {
+        return Long.compare(size, other.size);
+      }
+      return Integer.compare(length, other.length);
+    }
+
+    public String toString() {
+      return "{tag=" + tag + ", class-tag=" + classTag + ", size=" +
+          (tag >= 100 ? "<class>" : size)  // Class size is dependent on 32-bit vs 64-bit,
+                                           // so strip it.
+          + ", length=" + length + "}";
+    }
+  }
+
+  private static ArrayList<HeapElem> sort(int n, long classTags[], long sizes[], long tags[],
+      int lengths[]) {
+    ArrayList<HeapElem> ret = new ArrayList<HeapElem>(n);
+    for (int i = 0; i < n; i++) {
+      HeapElem elem = new HeapElem();
+      elem.classTag = classTags[i];
+      elem.size = sizes[i];
+      elem.tag = tags[i];
+      elem.length = lengths[i];
+      ret.add(elem);
+    }
+    Collections.sort(ret);
+    return ret;
+  }
+
+  private static native void setTag(Object o, long tag);
+  private static native long getTag(Object o);
+
+  private final static int HEAP_FILTER_OUT_TAGGED = 0x4;
+  private final static int HEAP_FILTER_OUT_UNTAGGED = 0x8;
+  private final static int HEAP_FILTER_OUT_CLASS_TAGGED = 0x10;
+  private final static int HEAP_FILTER_OUT_CLASS_UNTAGGED = 0x20;
+
+  private static native int iterateThroughHeapCount(int heapFilter,
+      Class<?> klassFilter, int stopAfter);
+  private static native int iterateThroughHeapData(int heapFilter,
+      Class<?> klassFilter, long classTags[], long sizes[], long tags[], int lengths[]);
+  private static native int iterateThroughHeapAdd(int heapFilter,
+      Class<?> klassFilter);
+}
diff --git a/test/907-get-loaded-classes/build b/test/907-get-loaded-classes/build
new file mode 100755
index 0000000..898e2e5
--- /dev/null
+++ b/test/907-get-loaded-classes/build
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+./default-build "$@" --experimental agents
diff --git a/test/562-no-intermediate/expected.txt b/test/907-get-loaded-classes/expected.txt
similarity index 100%
copy from test/562-no-intermediate/expected.txt
copy to test/907-get-loaded-classes/expected.txt
diff --git a/test/907-get-loaded-classes/get_loaded_classes.cc b/test/907-get-loaded-classes/get_loaded_classes.cc
new file mode 100644
index 0000000..afbb774
--- /dev/null
+++ b/test/907-get-loaded-classes/get_loaded_classes.cc
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "get_loaded_classes.h"
+
+#include <iostream>
+#include <pthread.h>
+#include <stdio.h>
+#include <vector>
+
+#include "base/macros.h"
+#include "jni.h"
+#include "openjdkjvmti/jvmti.h"
+#include "ScopedLocalRef.h"
+#include "ScopedUtfChars.h"
+
+#include "ti-agent/common_helper.h"
+#include "ti-agent/common_load.h"
+
+namespace art {
+namespace Test907GetLoadedClasses {
+
+static jstring GetClassName(JNIEnv* jni_env, jclass cls) {
+  ScopedLocalRef<jclass> class_class(jni_env, jni_env->GetObjectClass(cls));
+  jmethodID mid = jni_env->GetMethodID(class_class.get(), "getName", "()Ljava/lang/String;");
+  return reinterpret_cast<jstring>(jni_env->CallObjectMethod(cls, mid));
+}
+
+extern "C" JNIEXPORT jobjectArray JNICALL Java_Main_getLoadedClasses(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED) {
+  jint count = -1;
+  jclass* classes = nullptr;
+  jvmtiError result = jvmti_env->GetLoadedClasses(&count, &classes);
+  if (result != JVMTI_ERROR_NONE) {
+    char* err;
+    jvmti_env->GetErrorName(result, &err);
+    printf("Failure running GetLoadedClasses: %s\n", err);
+    return nullptr;
+  }
+
+  auto callback = [&](jint i) {
+    jstring class_name = GetClassName(env, classes[i]);
+    env->DeleteLocalRef(classes[i]);
+    return class_name;
+  };
+  jobjectArray ret = CreateObjectArray(env, count, "java/lang/String", callback);
+
+  // Need to Deallocate.
+  jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(classes));
+
+  return ret;
+}
+
+// Don't do anything
+jint OnLoad(JavaVM* vm,
+            char* options ATTRIBUTE_UNUSED,
+            void* reserved ATTRIBUTE_UNUSED) {
+  if (vm->GetEnv(reinterpret_cast<void**>(&jvmti_env), JVMTI_VERSION_1_0)) {
+    printf("Unable to get jvmti env!\n");
+    return 1;
+  }
+  SetAllCapabilities(jvmti_env);
+  return 0;
+}
+
+}  // namespace Test907GetLoadedClasses
+}  // namespace art
diff --git a/test/562-no-intermediate/src/Main.java b/test/907-get-loaded-classes/get_loaded_classes.h
similarity index 61%
copy from test/562-no-intermediate/src/Main.java
copy to test/907-get-loaded-classes/get_loaded_classes.h
index 3b74d6f..4d27f89 100644
--- a/test/562-no-intermediate/src/Main.java
+++ b/test/907-get-loaded-classes/get_loaded_classes.h
@@ -14,14 +14,17 @@
  * limitations under the License.
  */
 
-public class Main {
+#ifndef ART_TEST_907_GET_LOADED_CLASSES_GET_LOADED_CLASSES_H_
+#define ART_TEST_907_GET_LOADED_CLASSES_GET_LOADED_CLASSES_H_
 
-  /// CHECK-START-ARM64: int Main.main(String[]) register_allocator (after)
-  /// CHECK-NOT: IntermediateAddress
-  public static void main(String[] args) {
-    array[index] += Math.cos(42);
-  }
+#include <jni.h>
 
-  static int index = 0;
-  static double[] array = new double[2];
-}
+namespace art {
+namespace Test907GetLoadedClasses {
+
+jint OnLoad(JavaVM* vm, char* options, void* reserved);
+
+}  // namespace Test907GetLoadedClasses
+}  // namespace art
+
+#endif  // ART_TEST_907_GET_LOADED_CLASSES_GET_LOADED_CLASSES_H_
diff --git a/test/907-get-loaded-classes/info.txt b/test/907-get-loaded-classes/info.txt
new file mode 100644
index 0000000..875a5f6
--- /dev/null
+++ b/test/907-get-loaded-classes/info.txt
@@ -0,0 +1 @@
+Tests basic functions in the jvmti plugin.
diff --git a/test/907-get-loaded-classes/run b/test/907-get-loaded-classes/run
new file mode 100755
index 0000000..3f5a059
--- /dev/null
+++ b/test/907-get-loaded-classes/run
@@ -0,0 +1,43 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+plugin=libopenjdkjvmtid.so
+agent=libtiagentd.so
+lib=tiagentd
+if  [[ "$@" == *"-O"* ]]; then
+  agent=libtiagent.so
+  plugin=libopenjdkjvmti.so
+  lib=tiagent
+fi
+
+if [[ "$@" == *"--jvm"* ]]; then
+  arg="jvm"
+else
+  arg="art"
+fi
+
+if [[ "$@" != *"--debuggable"* ]]; then
+  other_args=" -Xcompiler-option --debuggable "
+else
+  other_args=""
+fi
+
+./default-run "$@" --experimental agents \
+                   --experimental runtime-plugins \
+                   --runtime-option -agentpath:${agent}=907-get-loaded-classes,${arg} \
+                   --android-runtime-option -Xplugin:${plugin} \
+                   ${other_args} \
+                   --args ${lib}
diff --git a/test/907-get-loaded-classes/src/Main.java b/test/907-get-loaded-classes/src/Main.java
new file mode 100644
index 0000000..468d037
--- /dev/null
+++ b/test/907-get-loaded-classes/src/Main.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    System.loadLibrary(args[1]);
+
+    doTest();
+  }
+
+  public static void doTest() throws Exception {
+    // Ensure some classes are loaded.
+    A a = new A();
+    B b = new B();
+    A[] aArray = new A[5];
+
+    String[] classes = getLoadedClasses();
+    HashSet<String> classesSet = new HashSet<>(Arrays.asList(classes));
+
+    String[] shouldBeLoaded = new String[] {
+        "java.lang.Object", "java.lang.Class", "java.lang.String", "Main$A", "Main$B", "[LMain$A;"
+    };
+
+    boolean error = false;
+    for (String s : shouldBeLoaded) {
+      if (!classesSet.contains(s)) {
+        System.out.println("Did not find " + s);
+        error = true;
+      }
+    }
+
+    if (error) {
+      System.out.println(Arrays.toString(classes));
+    }
+  }
+
+  static class A {
+  }
+
+  static class B {
+  }
+
+  private static native String[] getLoadedClasses();
+}
diff --git a/test/908-gc-start-finish/build b/test/908-gc-start-finish/build
new file mode 100755
index 0000000..898e2e5
--- /dev/null
+++ b/test/908-gc-start-finish/build
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+./default-build "$@" --experimental agents
diff --git a/test/908-gc-start-finish/expected.txt b/test/908-gc-start-finish/expected.txt
new file mode 100644
index 0000000..45f89dc
--- /dev/null
+++ b/test/908-gc-start-finish/expected.txt
@@ -0,0 +1,12 @@
+---
+true true
+---
+true true
+---
+true true
+---
+false false
+---
+false false
+---
+false false
diff --git a/test/908-gc-start-finish/gc_callbacks.cc b/test/908-gc-start-finish/gc_callbacks.cc
new file mode 100644
index 0000000..771d1ad
--- /dev/null
+++ b/test/908-gc-start-finish/gc_callbacks.cc
@@ -0,0 +1,107 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "gc_callbacks.h"
+
+#include <stdio.h>
+#include <string.h>
+
+#include "base/macros.h"
+#include "jni.h"
+#include "openjdkjvmti/jvmti.h"
+#include "ti-agent/common_helper.h"
+#include "ti-agent/common_load.h"
+
+namespace art {
+namespace Test908GcStartFinish {
+
+static size_t starts = 0;
+static size_t finishes = 0;
+
+static void JNICALL GarbageCollectionFinish(jvmtiEnv* ti_env ATTRIBUTE_UNUSED) {
+  finishes++;
+}
+
+static void JNICALL GarbageCollectionStart(jvmtiEnv* ti_env ATTRIBUTE_UNUSED) {
+  starts++;
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_setupGcCallback(
+    JNIEnv* env ATTRIBUTE_UNUSED, jclass klass ATTRIBUTE_UNUSED) {
+  jvmtiEventCallbacks callbacks;
+  memset(&callbacks, 0, sizeof(jvmtiEventCallbacks));
+  callbacks.GarbageCollectionFinish = GarbageCollectionFinish;
+  callbacks.GarbageCollectionStart = GarbageCollectionStart;
+
+  jvmtiError ret = jvmti_env->SetEventCallbacks(&callbacks, sizeof(callbacks));
+  if (ret != JVMTI_ERROR_NONE) {
+    char* err;
+    jvmti_env->GetErrorName(ret, &err);
+    printf("Error setting callbacks: %s\n", err);
+  }
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_enableGcTracking(JNIEnv* env ATTRIBUTE_UNUSED,
+                                                             jclass klass ATTRIBUTE_UNUSED,
+                                                             jboolean enable) {
+  jvmtiError ret = jvmti_env->SetEventNotificationMode(
+      enable ? JVMTI_ENABLE : JVMTI_DISABLE,
+      JVMTI_EVENT_GARBAGE_COLLECTION_START,
+      nullptr);
+  if (ret != JVMTI_ERROR_NONE) {
+    char* err;
+    jvmti_env->GetErrorName(ret, &err);
+    printf("Error enabling/disabling gc callbacks: %s\n", err);
+  }
+  ret = jvmti_env->SetEventNotificationMode(
+      enable ? JVMTI_ENABLE : JVMTI_DISABLE,
+      JVMTI_EVENT_GARBAGE_COLLECTION_FINISH,
+      nullptr);
+  if (ret != JVMTI_ERROR_NONE) {
+    char* err;
+    jvmti_env->GetErrorName(ret, &err);
+    printf("Error enabling/disabling gc callbacks: %s\n", err);
+  }
+}
+
+extern "C" JNIEXPORT jint JNICALL Java_Main_getGcStarts(JNIEnv* env ATTRIBUTE_UNUSED,
+                                                        jclass klass ATTRIBUTE_UNUSED) {
+  jint result = static_cast<jint>(starts);
+  starts = 0;
+  return result;
+}
+
+extern "C" JNIEXPORT jint JNICALL Java_Main_getGcFinishes(JNIEnv* env ATTRIBUTE_UNUSED,
+                                                          jclass klass ATTRIBUTE_UNUSED) {
+  jint result = static_cast<jint>(finishes);
+  finishes = 0;
+  return result;
+}
+
+// Don't do anything
+jint OnLoad(JavaVM* vm,
+            char* options ATTRIBUTE_UNUSED,
+            void* reserved ATTRIBUTE_UNUSED) {
+  if (vm->GetEnv(reinterpret_cast<void**>(&jvmti_env), JVMTI_VERSION_1_0)) {
+    printf("Unable to get jvmti env!\n");
+    return 1;
+  }
+  SetAllCapabilities(jvmti_env);
+  return 0;
+}
+
+}  // namespace Test908GcStartFinish
+}  // namespace art
diff --git a/test/562-no-intermediate/src/Main.java b/test/908-gc-start-finish/gc_callbacks.h
similarity index 63%
copy from test/562-no-intermediate/src/Main.java
copy to test/908-gc-start-finish/gc_callbacks.h
index 3b74d6f..177a4eb 100644
--- a/test/562-no-intermediate/src/Main.java
+++ b/test/908-gc-start-finish/gc_callbacks.h
@@ -14,14 +14,17 @@
  * limitations under the License.
  */
 
-public class Main {
+#ifndef ART_TEST_908_GC_START_FINISH_GC_CALLBACKS_H_
+#define ART_TEST_908_GC_START_FINISH_GC_CALLBACKS_H_
 
-  /// CHECK-START-ARM64: int Main.main(String[]) register_allocator (after)
-  /// CHECK-NOT: IntermediateAddress
-  public static void main(String[] args) {
-    array[index] += Math.cos(42);
-  }
+#include <jni.h>
 
-  static int index = 0;
-  static double[] array = new double[2];
-}
+namespace art {
+namespace Test908GcStartFinish {
+
+jint OnLoad(JavaVM* vm, char* options, void* reserved);
+
+}  // namespace Test908GcStartFinish
+}  // namespace art
+
+#endif  // ART_TEST_908_GC_START_FINISH_GC_CALLBACKS_H_
diff --git a/test/908-gc-start-finish/info.txt b/test/908-gc-start-finish/info.txt
new file mode 100644
index 0000000..875a5f6
--- /dev/null
+++ b/test/908-gc-start-finish/info.txt
@@ -0,0 +1 @@
+Tests basic functions in the jvmti plugin.
diff --git a/test/908-gc-start-finish/run b/test/908-gc-start-finish/run
new file mode 100755
index 0000000..2fc35f0
--- /dev/null
+++ b/test/908-gc-start-finish/run
@@ -0,0 +1,43 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+plugin=libopenjdkjvmtid.so
+agent=libtiagentd.so
+lib=tiagentd
+if  [[ "$@" == *"-O"* ]]; then
+  agent=libtiagent.so
+  plugin=libopenjdkjvmti.so
+  lib=tiagent
+fi
+
+if [[ "$@" == *"--jvm"* ]]; then
+  arg="jvm"
+else
+  arg="art"
+fi
+
+if [[ "$@" != *"--debuggable"* ]]; then
+  other_args=" -Xcompiler-option --debuggable "
+else
+  other_args=""
+fi
+
+./default-run "$@" --experimental agents \
+                   --experimental runtime-plugins \
+                   --runtime-option -agentpath:${agent}=908-gc-start-finish,${arg} \
+                   --android-runtime-option -Xplugin:${plugin} \
+                   ${other_args} \
+                   --args ${lib}
diff --git a/test/908-gc-start-finish/src/Main.java b/test/908-gc-start-finish/src/Main.java
new file mode 100644
index 0000000..2be0eea
--- /dev/null
+++ b/test/908-gc-start-finish/src/Main.java
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    System.loadLibrary(args[1]);
+
+    doTest();
+  }
+
+  public static void doTest() throws Exception {
+    // Use a list to ensure objects must be allocated.
+    ArrayList<Object> l = new ArrayList<>(100);
+
+    setupGcCallback();
+
+    enableGcTracking(true);
+    run(l);
+
+    enableGcTracking(false);
+    run(l);
+  }
+
+  private static void run(ArrayList<Object> l) {
+    allocate(l, 1);
+    l.clear();
+
+    Runtime.getRuntime().gc();
+
+    printStats();
+
+    // Note: the reporting will not depend on the heap layout (which could be unstable). Walking
+    //       the tag table should give us a stable output order.
+    for (int i = 10; i <= 1000; i *= 10) {
+      allocate(l, i);
+    }
+    l.clear();
+
+    Runtime.getRuntime().gc();
+
+    printStats();
+
+    Runtime.getRuntime().gc();
+
+    printStats();
+  }
+
+  private static void allocate(ArrayList<Object> l, long tag) {
+    Object obj = new Object();
+    l.add(obj);
+  }
+
+  private static void printStats() {
+      System.out.println("---");
+      int s = getGcStarts();
+      int f = getGcFinishes();
+      System.out.println((s > 0) + " " + (f > 0));
+  }
+
+  private static native void setupGcCallback();
+  private static native void enableGcTracking(boolean enable);
+  private static native int getGcStarts();
+  private static native int getGcFinishes();
+}
diff --git a/test/909-attach-agent/attach.cc b/test/909-attach-agent/attach.cc
new file mode 100644
index 0000000..2b50eb8
--- /dev/null
+++ b/test/909-attach-agent/attach.cc
@@ -0,0 +1,79 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "909-attach-agent/attach.h"
+
+#include <jni.h>
+#include <stdio.h>
+#include <string.h>
+#include "base/macros.h"
+#include "openjdkjvmti/jvmti.h"
+
+namespace art {
+namespace Test909AttachAgent {
+
+jint OnAttach(JavaVM* vm,
+            char* options ATTRIBUTE_UNUSED,
+            void* reserved ATTRIBUTE_UNUSED) {
+  printf("Attached Agent for test 909-attach-agent\n");
+  fsync(1);
+  jvmtiEnv* env = nullptr;
+  jvmtiEnv* env2 = nullptr;
+
+#define CHECK_CALL_SUCCESS(c) \
+  do { \
+    if ((c) != JNI_OK) { \
+      printf("call " #c " did not succeed\n"); \
+      return -1; \
+    } \
+  } while (false)
+
+  CHECK_CALL_SUCCESS(vm->GetEnv(reinterpret_cast<void**>(&env), JVMTI_VERSION_1_0));
+  CHECK_CALL_SUCCESS(vm->GetEnv(reinterpret_cast<void**>(&env2), JVMTI_VERSION_1_0));
+  if (env == env2) {
+    printf("GetEnv returned same environment twice!\n");
+    return -1;
+  }
+  unsigned char* local_data = nullptr;
+  CHECK_CALL_SUCCESS(env->Allocate(8, &local_data));
+  strcpy(reinterpret_cast<char*>(local_data), "hello!!");
+  CHECK_CALL_SUCCESS(env->SetEnvironmentLocalStorage(local_data));
+  unsigned char* get_data = nullptr;
+  CHECK_CALL_SUCCESS(env->GetEnvironmentLocalStorage(reinterpret_cast<void**>(&get_data)));
+  if (get_data != local_data) {
+    printf("Got different data from local storage then what was set!\n");
+    return -1;
+  }
+  CHECK_CALL_SUCCESS(env2->GetEnvironmentLocalStorage(reinterpret_cast<void**>(&get_data)));
+  if (get_data != nullptr) {
+    printf("env2 did not have nullptr local storage.\n");
+    return -1;
+  }
+  CHECK_CALL_SUCCESS(env->Deallocate(local_data));
+  jint version = 0;
+  CHECK_CALL_SUCCESS(env->GetVersionNumber(&version));
+  if ((version & JVMTI_VERSION_1) != JVMTI_VERSION_1) {
+    printf("Unexpected version number!\n");
+    return -1;
+  }
+  CHECK_CALL_SUCCESS(env->DisposeEnvironment());
+  CHECK_CALL_SUCCESS(env2->DisposeEnvironment());
+#undef CHECK_CALL_SUCCESS
+  return JNI_OK;
+}
+
+}  // namespace Test909AttachAgent
+}  // namespace art
diff --git a/test/562-no-intermediate/src/Main.java b/test/909-attach-agent/attach.h
similarity index 65%
copy from test/562-no-intermediate/src/Main.java
copy to test/909-attach-agent/attach.h
index 3b74d6f..3e6fe6c 100644
--- a/test/562-no-intermediate/src/Main.java
+++ b/test/909-attach-agent/attach.h
@@ -14,14 +14,17 @@
  * limitations under the License.
  */
 
-public class Main {
+#ifndef ART_TEST_909_ATTACH_AGENT_ATTACH_H_
+#define ART_TEST_909_ATTACH_AGENT_ATTACH_H_
 
-  /// CHECK-START-ARM64: int Main.main(String[]) register_allocator (after)
-  /// CHECK-NOT: IntermediateAddress
-  public static void main(String[] args) {
-    array[index] += Math.cos(42);
-  }
+#include <jni.h>
 
-  static int index = 0;
-  static double[] array = new double[2];
-}
+namespace art {
+namespace Test909AttachAgent {
+
+jint OnAttach(JavaVM* vm, char* options, void* reserved);
+
+}  // namespace Test909AttachAgent
+}  // namespace art
+
+#endif  // ART_TEST_909_ATTACH_AGENT_ATTACH_H_
diff --git a/test/909-attach-agent/build b/test/909-attach-agent/build
new file mode 100755
index 0000000..898e2e5
--- /dev/null
+++ b/test/909-attach-agent/build
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+./default-build "$@" --experimental agents
diff --git a/test/909-attach-agent/expected.txt b/test/909-attach-agent/expected.txt
new file mode 100644
index 0000000..eacc595
--- /dev/null
+++ b/test/909-attach-agent/expected.txt
@@ -0,0 +1,3 @@
+Hello, world!
+Attached Agent for test 909-attach-agent
+Goodbye!
diff --git a/test/909-attach-agent/info.txt b/test/909-attach-agent/info.txt
new file mode 100644
index 0000000..06f3c8c
--- /dev/null
+++ b/test/909-attach-agent/info.txt
@@ -0,0 +1 @@
+Tests jvmti plugin attaching during live phase.
diff --git a/test/909-attach-agent/run b/test/909-attach-agent/run
new file mode 100755
index 0000000..aed6e83
--- /dev/null
+++ b/test/909-attach-agent/run
@@ -0,0 +1,27 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+plugin=libopenjdkjvmtid.so
+agent=libtiagentd.so
+if  [[ "$@" == *"-O"* ]]; then
+  agent=libtiagent.so
+  plugin=libopenjdkjvmti.so
+fi
+
+./default-run "$@" --experimental agents \
+                   --experimental runtime-plugins \
+                   --android-runtime-option -Xplugin:${plugin} \
+                   --args agent:${agent}=909-attach-agent
diff --git a/test/909-attach-agent/src/Main.java b/test/909-attach-agent/src/Main.java
new file mode 100644
index 0000000..8a8a087
--- /dev/null
+++ b/test/909-attach-agent/src/Main.java
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import dalvik.system.VMDebug;
+import java.io.IOException;
+
+public class Main {
+  public static void main(String[] args) {
+    System.out.println("Hello, world!");
+    for(String a : args) {
+      if(a.startsWith("agent:")) {
+        String agent = a.substring(6);
+        try {
+          VMDebug.attachAgent(agent);
+        } catch(IOException e) {
+          e.printStackTrace();
+        }
+      }
+    }
+    System.out.println("Goodbye!");
+  }
+}
diff --git a/test/910-methods/build b/test/910-methods/build
new file mode 100755
index 0000000..898e2e5
--- /dev/null
+++ b/test/910-methods/build
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+./default-build "$@" --experimental agents
diff --git a/test/910-methods/expected.txt b/test/910-methods/expected.txt
new file mode 100644
index 0000000..9a74799
--- /dev/null
+++ b/test/910-methods/expected.txt
@@ -0,0 +1,15 @@
+[toString, ()Ljava/lang/String;, null]
+class java.lang.Object
+1
+[charAt, (I)C, null]
+class java.lang.String
+257
+[sqrt, (D)D, null]
+class java.lang.Math
+265
+[add, (Ljava/lang/Object;)Z, null]
+interface java.util.List
+1025
+[run, ()V, null]
+class $Proxy0
+17
diff --git a/test/910-methods/info.txt b/test/910-methods/info.txt
new file mode 100644
index 0000000..875a5f6
--- /dev/null
+++ b/test/910-methods/info.txt
@@ -0,0 +1 @@
+Tests basic functions in the jvmti plugin.
diff --git a/test/910-methods/methods.cc b/test/910-methods/methods.cc
new file mode 100644
index 0000000..8f0850b
--- /dev/null
+++ b/test/910-methods/methods.cc
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "methods.h"
+
+#include <stdio.h>
+
+#include "base/macros.h"
+#include "jni.h"
+#include "openjdkjvmti/jvmti.h"
+#include "ScopedLocalRef.h"
+
+#include "ti-agent/common_helper.h"
+#include "ti-agent/common_load.h"
+
+namespace art {
+namespace Test910Methods {
+
+extern "C" JNIEXPORT jobjectArray JNICALL Java_Main_getMethodName(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jobject method) {
+  jmethodID id = env->FromReflectedMethod(method);
+
+  char* name;
+  char* sig;
+  char* gen;
+  jvmtiError result = jvmti_env->GetMethodName(id, &name, &sig, &gen);
+  if (result != JVMTI_ERROR_NONE) {
+    char* err;
+    jvmti_env->GetErrorName(result, &err);
+    printf("Failure running GetMethodName: %s\n", err);
+    return nullptr;
+  }
+
+  auto callback = [&](jint i) {
+    if (i == 0) {
+      return name == nullptr ? nullptr : env->NewStringUTF(name);
+    } else if (i == 1) {
+      return sig == nullptr ? nullptr : env->NewStringUTF(sig);
+    } else {
+      return gen == nullptr ? nullptr : env->NewStringUTF(gen);
+    }
+  };
+  jobjectArray ret = CreateObjectArray(env, 3, "java/lang/String", callback);
+
+  // Need to deallocate the strings.
+  if (name != nullptr) {
+    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(name));
+  }
+  if (sig != nullptr) {
+    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(sig));
+  }
+  if (gen != nullptr) {
+    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(gen));
+  }
+
+  return ret;
+}
+
+extern "C" JNIEXPORT jclass JNICALL Java_Main_getMethodDeclaringClass(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jobject method) {
+  jmethodID id = env->FromReflectedMethod(method);
+
+  jclass declaring_class;
+  jvmtiError result = jvmti_env->GetMethodDeclaringClass(id, &declaring_class);
+  if (result != JVMTI_ERROR_NONE) {
+    char* err;
+    jvmti_env->GetErrorName(result, &err);
+    printf("Failure running GetMethodDeclaringClass: %s\n", err);
+    return nullptr;
+  }
+
+  return declaring_class;
+}
+
+extern "C" JNIEXPORT jint JNICALL Java_Main_getMethodModifiers(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jobject method) {
+  jmethodID id = env->FromReflectedMethod(method);
+
+  jint modifiers;
+  jvmtiError result = jvmti_env->GetMethodModifiers(id, &modifiers);
+  if (result != JVMTI_ERROR_NONE) {
+    char* err;
+    jvmti_env->GetErrorName(result, &err);
+    printf("Failure running GetMethodModifiers: %s\n", err);
+    return 0;
+  }
+
+  return modifiers;
+}
+
+// Don't do anything
+jint OnLoad(JavaVM* vm,
+            char* options ATTRIBUTE_UNUSED,
+            void* reserved ATTRIBUTE_UNUSED) {
+  if (vm->GetEnv(reinterpret_cast<void**>(&jvmti_env), JVMTI_VERSION_1_0)) {
+    printf("Unable to get jvmti env!\n");
+    return 1;
+  }
+  SetAllCapabilities(jvmti_env);
+  return 0;
+}
+
+}  // namespace Test910Methods
+}  // namespace art
diff --git a/test/562-no-intermediate/src/Main.java b/test/910-methods/methods.h
similarity index 67%
copy from test/562-no-intermediate/src/Main.java
copy to test/910-methods/methods.h
index 3b74d6f..93d1874 100644
--- a/test/562-no-intermediate/src/Main.java
+++ b/test/910-methods/methods.h
@@ -14,14 +14,17 @@
  * limitations under the License.
  */
 
-public class Main {
+#ifndef ART_TEST_910_METHODS_METHODS_H_
+#define ART_TEST_910_METHODS_METHODS_H_
 
-  /// CHECK-START-ARM64: int Main.main(String[]) register_allocator (after)
-  /// CHECK-NOT: IntermediateAddress
-  public static void main(String[] args) {
-    array[index] += Math.cos(42);
-  }
+#include <jni.h>
 
-  static int index = 0;
-  static double[] array = new double[2];
-}
+namespace art {
+namespace Test910Methods {
+
+jint OnLoad(JavaVM* vm, char* options, void* reserved);
+
+}  // namespace Test910Methods
+}  // namespace art
+
+#endif  // ART_TEST_910_METHODS_METHODS_H_
diff --git a/test/910-methods/run b/test/910-methods/run
new file mode 100755
index 0000000..4dd2555
--- /dev/null
+++ b/test/910-methods/run
@@ -0,0 +1,43 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+plugin=libopenjdkjvmtid.so
+agent=libtiagentd.so
+lib=tiagentd
+if  [[ "$@" == *"-O"* ]]; then
+  agent=libtiagent.so
+  plugin=libopenjdkjvmti.so
+  lib=tiagent
+fi
+
+if [[ "$@" == *"--jvm"* ]]; then
+  arg="jvm"
+else
+  arg="art"
+fi
+
+if [[ "$@" != *"--debuggable"* ]]; then
+  other_args=" -Xcompiler-option --debuggable "
+else
+  other_args=""
+fi
+
+./default-run "$@" --experimental agents \
+                   --experimental runtime-plugins \
+                   --runtime-option -agentpath:${agent}=910-methods,${arg} \
+                   --android-runtime-option -Xplugin:${plugin} \
+                   ${other_args} \
+                   --args ${lib}
diff --git a/test/910-methods/src/Main.java b/test/910-methods/src/Main.java
new file mode 100644
index 0000000..3459134
--- /dev/null
+++ b/test/910-methods/src/Main.java
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Method;
+import java.lang.reflect.Proxy;
+import java.util.Arrays;
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    System.loadLibrary(args[1]);
+
+    doTest();
+  }
+
+  public static void doTest() throws Exception {
+    testMethod("java.lang.Object", "toString");
+    testMethod("java.lang.String", "charAt", int.class);
+    testMethod("java.lang.Math", "sqrt", double.class);
+    testMethod("java.util.List", "add", Object.class);
+
+    testMethod(getProxyClass(), "run");
+  }
+
+  private static Class<?> proxyClass = null;
+
+  private static Class<?> getProxyClass() throws Exception {
+    if (proxyClass != null) {
+      return proxyClass;
+    }
+
+    proxyClass = Proxy.getProxyClass(Main.class.getClassLoader(), new Class[] { Runnable.class });
+    return proxyClass;
+  }
+
+  private static void testMethod(String className, String methodName, Class<?>... types)
+      throws Exception {
+    Class<?> base = Class.forName(className);
+    testMethod(base, methodName, types);
+  }
+
+  private static void testMethod(Class<?> base, String methodName, Class<?>... types)
+      throws Exception {
+    Method m = base.getDeclaredMethod(methodName, types);
+    String[] result = getMethodName(m);
+    System.out.println(Arrays.toString(result));
+
+    Class<?> declClass = getMethodDeclaringClass(m);
+    if (base != declClass) {
+      throw new RuntimeException("Declaring class not equal: " + base + " vs " + declClass);
+    }
+    System.out.println(declClass);
+
+    int modifiers = getMethodModifiers(m);
+    if (modifiers != m.getModifiers()) {
+      throw new RuntimeException("Modifiers not equal: " + m.getModifiers() + " vs " + modifiers);
+    }
+    System.out.println(modifiers);
+  }
+
+  private static native String[] getMethodName(Method m);
+  private static native Class<?> getMethodDeclaringClass(Method m);
+  private static native int getMethodModifiers(Method m);
+}
diff --git a/test/911-get-stack-trace/build b/test/911-get-stack-trace/build
new file mode 100755
index 0000000..898e2e5
--- /dev/null
+++ b/test/911-get-stack-trace/build
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+./default-build "$@" --experimental agents
diff --git a/test/911-get-stack-trace/expected.txt b/test/911-get-stack-trace/expected.txt
new file mode 100644
index 0000000..20bab78
--- /dev/null
+++ b/test/911-get-stack-trace/expected.txt
@@ -0,0 +1,208 @@
+###################
+### Same thread ###
+###################
+From top
+---------
+ getStackTrace (Ljava/lang/Thread;II)[Ljava/lang/String;
+ print (Ljava/lang/Thread;II)V
+ printOrWait (IILMain$ControlData;)V
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ doTest ()V
+ main ([Ljava/lang/String;)V
+---------
+ print (Ljava/lang/Thread;II)V
+ printOrWait (IILMain$ControlData;)V
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ doTest ()V
+ main ([Ljava/lang/String;)V
+---------
+ getStackTrace (Ljava/lang/Thread;II)[Ljava/lang/String;
+ print (Ljava/lang/Thread;II)V
+ printOrWait (IILMain$ControlData;)V
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+---------
+ printOrWait (IILMain$ControlData;)V
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+From bottom
+---------
+ main ([Ljava/lang/String;)V
+---------
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ doTest ()V
+ main ([Ljava/lang/String;)V
+---------
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+
+################################
+### Other thread (suspended) ###
+################################
+From top
+---------
+ wait ()V
+ printOrWait (IILMain$ControlData;)V
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ run ()V
+---------
+ printOrWait (IILMain$ControlData;)V
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ run ()V
+---------
+ wait ()V
+ printOrWait (IILMain$ControlData;)V
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+---------
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+From bottom
+---------
+ run ()V
+---------
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ run ()V
+---------
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+
+###########################
+### Other thread (live) ###
+###########################
+From top
+---------
+ printOrWait (IILMain$ControlData;)V
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ run ()V
+---------
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ run ()V
+---------
+ printOrWait (IILMain$ControlData;)V
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+---------
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+From bottom
+---------
+ run ()V
+---------
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ run ()V
+---------
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
+ foo (IIILMain$ControlData;)I
+ baz (IIILMain$ControlData;)Ljava/lang/Object;
+ bar (IIILMain$ControlData;)J
diff --git a/test/911-get-stack-trace/info.txt b/test/911-get-stack-trace/info.txt
new file mode 100644
index 0000000..875a5f6
--- /dev/null
+++ b/test/911-get-stack-trace/info.txt
@@ -0,0 +1 @@
+Tests basic functions in the jvmti plugin.
diff --git a/test/911-get-stack-trace/run b/test/911-get-stack-trace/run
new file mode 100755
index 0000000..43fc325
--- /dev/null
+++ b/test/911-get-stack-trace/run
@@ -0,0 +1,43 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+plugin=libopenjdkjvmtid.so
+agent=libtiagentd.so
+lib=tiagentd
+if  [[ "$@" == *"-O"* ]]; then
+  agent=libtiagent.so
+  plugin=libopenjdkjvmti.so
+  lib=tiagent
+fi
+
+if [[ "$@" == *"--jvm"* ]]; then
+  arg="jvm"
+else
+  arg="art"
+fi
+
+if [[ "$@" != *"--debuggable"* ]]; then
+  other_args=" -Xcompiler-option --debuggable "
+else
+  other_args=""
+fi
+
+./default-run "$@" --experimental agents \
+                   --experimental runtime-plugins \
+                   --runtime-option -agentpath:${agent}=911-get-stack-trace,${arg} \
+                   --android-runtime-option -Xplugin:${plugin} \
+                   ${other_args} \
+                   --args ${lib}
diff --git a/test/911-get-stack-trace/src/Main.java b/test/911-get-stack-trace/src/Main.java
new file mode 100644
index 0000000..df4501d
--- /dev/null
+++ b/test/911-get-stack-trace/src/Main.java
@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Arrays;
+import java.util.concurrent.CountDownLatch;
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    System.loadLibrary(args[1]);
+
+    doTest();
+    doTestOtherThreadWait();
+    doTestOtherThreadBusyLoop();
+  }
+
+  public static void doTest() throws Exception {
+    System.out.println("###################");
+    System.out.println("### Same thread ###");
+    System.out.println("###################");
+    System.out.println("From top");
+    Recurse.foo(4, 0, 25, null);
+    Recurse.foo(4, 1, 25, null);
+    Recurse.foo(4, 0, 5, null);
+    Recurse.foo(4, 2, 5, null);
+
+    System.out.println("From bottom");
+    Recurse.foo(4, -1, 25, null);
+    Recurse.foo(4, -5, 5, null);
+    Recurse.foo(4, -7, 5, null);
+  }
+
+  public static void doTestOtherThreadWait() throws Exception {
+    System.out.println();
+    System.out.println("################################");
+    System.out.println("### Other thread (suspended) ###");
+    System.out.println("################################");
+    final ControlData data = new ControlData();
+    data.waitFor = new Object();
+    Thread t = new Thread() {
+      public void run() {
+        Recurse.foo(4, 0, 0, data);
+      }
+    };
+    t.start();
+    data.reached.await();
+    Thread.yield();
+    Thread.sleep(500);  // A little bit of time...
+
+    System.out.println("From top");
+    print(t, 0, 25);
+    print(t, 1, 25);
+    print(t, 0, 5);
+    print(t, 2, 5);
+
+    System.out.println("From bottom");
+    print(t, -1, 25);
+    print(t, -5, 5);
+    print(t, -7, 5);
+
+    // Let the thread make progress and die.
+    synchronized(data.waitFor) {
+      data.waitFor.notifyAll();
+    }
+    t.join();
+  }
+
+  public static void doTestOtherThreadBusyLoop() throws Exception {
+    System.out.println();
+    System.out.println("###########################");
+    System.out.println("### Other thread (live) ###");
+    System.out.println("###########################");
+    final ControlData data = new ControlData();
+    Thread t = new Thread() {
+      public void run() {
+        Recurse.foo(4, 0, 0, data);
+      }
+    };
+    t.start();
+    data.reached.await();
+    Thread.yield();
+    Thread.sleep(500);  // A little bit of time...
+
+    System.out.println("From top");
+    print(t, 0, 25);
+    print(t, 1, 25);
+    print(t, 0, 5);
+    print(t, 2, 5);
+
+    System.out.println("From bottom");
+    print(t, -1, 25);
+    print(t, -5, 5);
+    print(t, -7, 5);
+
+    // Let the thread stop looping and die.
+    data.stop = true;
+    t.join();
+  }
+
+  public static void print(String[] stack) {
+    System.out.println("---------");
+    for (int i = 0; i < stack.length; i += 2) {
+      System.out.print(' ');
+      System.out.print(stack[i]);
+      System.out.print(' ');
+      System.out.println(stack[i + 1]);
+    }
+  }
+
+  public static void print(Thread t, int start, int max) {
+    print(getStackTrace(t, start, max));
+  }
+
+  // Wrap generated stack traces into a class to separate them nicely.
+  public static class Recurse {
+
+    public static int foo(int x, int start, int max, ControlData data) {
+      bar(x, start, max, data);
+      return 0;
+    }
+
+    private static long bar(int x, int start, int max, ControlData data) {
+      baz(x, start, max, data);
+      return 0;
+    }
+
+    private static Object baz(int x, int start, int max, ControlData data) {
+      if (x == 0) {
+        printOrWait(start, max, data);
+      } else {
+        foo(x - 1, start, max, data);
+      }
+      return null;
+    }
+
+    private static void printOrWait(int start, int max, ControlData data) {
+      if (data == null) {
+        print(Thread.currentThread(), start, max);
+      } else {
+        if (data.waitFor != null) {
+          synchronized (data.waitFor) {
+            data.reached.countDown();
+            try {
+              data.waitFor.wait();  // Use wait() as it doesn't have a "hidden" Java call-graph.
+            } catch (Throwable t) {
+              throw new RuntimeException(t);
+            }
+          }
+        } else {
+          data.reached.countDown();
+          while (!data.stop) {
+            // Busy-loop.
+          }
+        }
+      }
+    }
+  }
+
+  public static class ControlData {
+    CountDownLatch reached = new CountDownLatch(1);
+    Object waitFor = null;
+    volatile boolean stop = false;
+  }
+
+  public static native String[] getStackTrace(Thread thread, int start, int max);
+}
diff --git a/test/911-get-stack-trace/stack_trace.cc b/test/911-get-stack-trace/stack_trace.cc
new file mode 100644
index 0000000..e7d9380
--- /dev/null
+++ b/test/911-get-stack-trace/stack_trace.cc
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "stack_trace.h"
+
+#include <memory>
+#include <stdio.h>
+
+#include "base/logging.h"
+#include "jni.h"
+#include "openjdkjvmti/jvmti.h"
+#include "ScopedLocalRef.h"
+#include "ti-agent/common_helper.h"
+#include "ti-agent/common_load.h"
+
+namespace art {
+namespace Test911GetStackTrace {
+
+extern "C" JNIEXPORT jobjectArray JNICALL Java_Main_getStackTrace(
+    JNIEnv* env, jclass klass ATTRIBUTE_UNUSED, jthread thread, jint start, jint max) {
+  std::unique_ptr<jvmtiFrameInfo[]> frames(new jvmtiFrameInfo[max]);
+
+  jint count;
+  {
+    jvmtiError result = jvmti_env->GetStackTrace(thread, start, max, frames.get(), &count);
+    if (result != JVMTI_ERROR_NONE) {
+      char* err;
+      jvmti_env->GetErrorName(result, &err);
+      printf("Failure running GetStackTrace: %s\n", err);
+      return nullptr;
+    }
+  }
+
+  auto callback = [&](jint i) -> jstring {
+    size_t method_index = static_cast<size_t>(i) / 2;
+    char* name;
+    char* sig;
+    char* gen;
+    {
+      jvmtiError result2 = jvmti_env->GetMethodName(frames[method_index].method, &name, &sig, &gen);
+      if (result2 != JVMTI_ERROR_NONE) {
+        char* err;
+        jvmti_env->GetErrorName(result2, &err);
+        printf("Failure running GetMethodName: %s\n", err);
+        return nullptr;
+      }
+    }
+    jstring callback_result;
+    if (i % 2 == 0) {
+      callback_result = name == nullptr ? nullptr : env->NewStringUTF(name);
+    } else {
+      callback_result = sig == nullptr ? nullptr : env->NewStringUTF(sig);
+    }
+
+    if (name != nullptr) {
+      jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(name));
+    }
+    if (sig != nullptr) {
+      jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(sig));
+    }
+    if (gen != nullptr) {
+      jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(gen));
+    }
+    return callback_result;
+  };
+  return CreateObjectArray(env, 2 * count, "java/lang/String", callback);
+}
+
+// Don't do anything
+jint OnLoad(JavaVM* vm,
+            char* options ATTRIBUTE_UNUSED,
+            void* reserved ATTRIBUTE_UNUSED) {
+  if (vm->GetEnv(reinterpret_cast<void**>(&jvmti_env), JVMTI_VERSION_1_0)) {
+    printf("Unable to get jvmti env!\n");
+    return 1;
+  }
+  SetAllCapabilities(jvmti_env);
+  return 0;
+}
+
+}  // namespace Test911GetStackTrace
+}  // namespace art
diff --git a/test/562-no-intermediate/src/Main.java b/test/911-get-stack-trace/stack_trace.h
similarity index 64%
copy from test/562-no-intermediate/src/Main.java
copy to test/911-get-stack-trace/stack_trace.h
index 3b74d6f..eba2a91 100644
--- a/test/562-no-intermediate/src/Main.java
+++ b/test/911-get-stack-trace/stack_trace.h
@@ -14,14 +14,17 @@
  * limitations under the License.
  */
 
-public class Main {
+#ifndef ART_TEST_911_GET_STACK_TRACE_STACK_TRACE_H_
+#define ART_TEST_911_GET_STACK_TRACE_STACK_TRACE_H_
 
-  /// CHECK-START-ARM64: int Main.main(String[]) register_allocator (after)
-  /// CHECK-NOT: IntermediateAddress
-  public static void main(String[] args) {
-    array[index] += Math.cos(42);
-  }
+#include <jni.h>
 
-  static int index = 0;
-  static double[] array = new double[2];
-}
+namespace art {
+namespace Test911GetStackTrace {
+
+jint OnLoad(JavaVM* vm, char* options, void* reserved);
+
+}  // namespace Test911GetStackTrace
+}  // namespace art
+
+#endif  // ART_TEST_911_GET_STACK_TRACE_STACK_TRACE_H_
diff --git a/test/912-classes/build b/test/912-classes/build
new file mode 100755
index 0000000..898e2e5
--- /dev/null
+++ b/test/912-classes/build
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+./default-build "$@" --experimental agents
diff --git a/test/912-classes/classes.cc b/test/912-classes/classes.cc
new file mode 100644
index 0000000..838a92a
--- /dev/null
+++ b/test/912-classes/classes.cc
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "classes.h"
+
+#include <stdio.h>
+
+#include "base/macros.h"
+#include "jni.h"
+#include "openjdkjvmti/jvmti.h"
+#include "ScopedLocalRef.h"
+
+#include "ti-agent/common_helper.h"
+#include "ti-agent/common_load.h"
+
+namespace art {
+namespace Test912Classes {
+
+extern "C" JNIEXPORT jobjectArray JNICALL Java_Main_getClassSignature(
+    JNIEnv* env, jclass Main_klass ATTRIBUTE_UNUSED, jclass klass) {
+  char* sig;
+  char* gen;
+  jvmtiError result = jvmti_env->GetClassSignature(klass, &sig, &gen);
+  if (result != JVMTI_ERROR_NONE) {
+    char* err;
+    jvmti_env->GetErrorName(result, &err);
+    printf("Failure running GetClassSignature: %s\n", err);
+    return nullptr;
+  }
+
+  auto callback = [&](jint i) {
+    if (i == 0) {
+      return sig == nullptr ? nullptr : env->NewStringUTF(sig);
+    } else {
+      return gen == nullptr ? nullptr : env->NewStringUTF(gen);
+    }
+  };
+  jobjectArray ret = CreateObjectArray(env, 2, "java/lang/String", callback);
+
+  // Need to deallocate the strings.
+  if (sig != nullptr) {
+    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(sig));
+  }
+  if (gen != nullptr) {
+    jvmti_env->Deallocate(reinterpret_cast<unsigned char*>(gen));
+  }
+
+  return ret;
+}
+
+// Don't do anything
+jint OnLoad(JavaVM* vm,
+            char* options ATTRIBUTE_UNUSED,
+            void* reserved ATTRIBUTE_UNUSED) {
+  if (vm->GetEnv(reinterpret_cast<void**>(&jvmti_env), JVMTI_VERSION_1_0)) {
+    printf("Unable to get jvmti env!\n");
+    return 1;
+  }
+  SetAllCapabilities(jvmti_env);
+  return 0;
+}
+
+}  // namespace Test912Classes
+}  // namespace art
diff --git a/test/562-no-intermediate/src/Main.java b/test/912-classes/classes.h
similarity index 67%
copy from test/562-no-intermediate/src/Main.java
copy to test/912-classes/classes.h
index 3b74d6f..62fb203 100644
--- a/test/562-no-intermediate/src/Main.java
+++ b/test/912-classes/classes.h
@@ -14,14 +14,17 @@
  * limitations under the License.
  */
 
-public class Main {
+#ifndef ART_TEST_912_CLASSES_CLASSES_H_
+#define ART_TEST_912_CLASSES_CLASSES_H_
 
-  /// CHECK-START-ARM64: int Main.main(String[]) register_allocator (after)
-  /// CHECK-NOT: IntermediateAddress
-  public static void main(String[] args) {
-    array[index] += Math.cos(42);
-  }
+#include <jni.h>
 
-  static int index = 0;
-  static double[] array = new double[2];
-}
+namespace art {
+namespace Test912Classes {
+
+jint OnLoad(JavaVM* vm, char* options, void* reserved);
+
+}  // namespace Test912Classes
+}  // namespace art
+
+#endif  // ART_TEST_912_CLASSES_CLASSES_H_
diff --git a/test/912-classes/expected.txt b/test/912-classes/expected.txt
new file mode 100644
index 0000000..71b22f4
--- /dev/null
+++ b/test/912-classes/expected.txt
@@ -0,0 +1,7 @@
+[Ljava/lang/Object;, null]
+[Ljava/lang/String;, null]
+[Ljava/lang/Math;, null]
+[Ljava/util/List;, null]
+[L$Proxy0;, null]
+[I, null]
+[[D, null]
diff --git a/test/912-classes/info.txt b/test/912-classes/info.txt
new file mode 100644
index 0000000..875a5f6
--- /dev/null
+++ b/test/912-classes/info.txt
@@ -0,0 +1 @@
+Tests basic functions in the jvmti plugin.
diff --git a/test/912-classes/run b/test/912-classes/run
new file mode 100755
index 0000000..64bbb98
--- /dev/null
+++ b/test/912-classes/run
@@ -0,0 +1,43 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+plugin=libopenjdkjvmtid.so
+agent=libtiagentd.so
+lib=tiagentd
+if  [[ "$@" == *"-O"* ]]; then
+  agent=libtiagent.so
+  plugin=libopenjdkjvmti.so
+  lib=tiagent
+fi
+
+if [[ "$@" == *"--jvm"* ]]; then
+  arg="jvm"
+else
+  arg="art"
+fi
+
+if [[ "$@" != *"--debuggable"* ]]; then
+  other_args=" -Xcompiler-option --debuggable "
+else
+  other_args=""
+fi
+
+./default-run "$@" --experimental agents \
+                   --experimental runtime-plugins \
+                   --runtime-option -agentpath:${agent}=912-classes,${arg} \
+                   --android-runtime-option -Xplugin:${plugin} \
+                   ${other_args} \
+                   --args ${lib}
diff --git a/test/912-classes/src/Main.java b/test/912-classes/src/Main.java
new file mode 100644
index 0000000..025584e
--- /dev/null
+++ b/test/912-classes/src/Main.java
@@ -0,0 +1,61 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.reflect.Proxy;
+import java.util.Arrays;
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    System.loadLibrary(args[1]);
+
+    doTest();
+  }
+
+  public static void doTest() throws Exception {
+    testClass("java.lang.Object");
+    testClass("java.lang.String");
+    testClass("java.lang.Math");
+    testClass("java.util.List");
+
+    testClass(getProxyClass());
+
+    testClass(int.class);
+    testClass(double[].class);
+  }
+
+  private static Class<?> proxyClass = null;
+
+  private static Class<?> getProxyClass() throws Exception {
+    if (proxyClass != null) {
+      return proxyClass;
+    }
+
+    proxyClass = Proxy.getProxyClass(Main.class.getClassLoader(), new Class[] { Runnable.class });
+    return proxyClass;
+  }
+
+  private static void testClass(String className) throws Exception {
+    Class<?> base = Class.forName(className);
+    testClass(base);
+  }
+
+  private static void testClass(Class<?> base) throws Exception {
+    String[] result = getClassSignature(base);
+    System.out.println(Arrays.toString(result));
+  }
+
+  private static native String[] getClassSignature(Class<?> c);
+}
diff --git a/test/913-heaps/build b/test/913-heaps/build
new file mode 100755
index 0000000..898e2e5
--- /dev/null
+++ b/test/913-heaps/build
@@ -0,0 +1,17 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+./default-build "$@" --experimental agents
diff --git a/test/913-heaps/expected.txt b/test/913-heaps/expected.txt
new file mode 100644
index 0000000..d1ddbae
--- /dev/null
+++ b/test/913-heaps/expected.txt
@@ -0,0 +1,92 @@
+---
+true true
+root@root --(stack-local)--> 1@1000 [size=16, length=-1]
+root@root --(stack-local)--> 3000@0 [size=132, length=-1]
+root@root --(thread)--> 3000@0 [size=132, length=-1]
+0@0 --(array-element@0)--> 1@1000 [size=16, length=-1]
+1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
+1002@0 --(interface)--> 2001@0 [size=132, length=-1]
+1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
+1@1000 --(class)--> 1000@0 [size=123, length=-1]
+1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
+1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
+2001@0 --(interface)--> 2000@0 [size=132, length=-1]
+2@1000 --(class)--> 1000@0 [size=123, length=-1]
+3@1001 --(class)--> 1001@0 [size=123, length=-1]
+3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@20)--> 5@1002 [size=32, length=-1]
+4@1000 --(class)--> 1000@0 [size=123, length=-1]
+5@1002 --(class)--> 1002@0 [size=123, length=-1]
+5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+6@1000 --(class)--> 1000@0 [size=123, length=-1]
+---
+root@root --(stack-local)--> 1@1000 [size=16, length=-1]
+root@root --(stack-local)--> 2@1000 [size=16, length=-1]
+root@root --(stack-local)--> 3000@0 [size=132, length=-1]
+root@root --(thread)--> 2@1000 [size=16, length=-1]
+root@root --(thread)--> 3000@0 [size=132, length=-1]
+0@0 --(array-element@0)--> 1@1000 [size=16, length=-1]
+1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
+1002@0 --(interface)--> 2001@0 [size=132, length=-1]
+1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
+1@1000 --(class)--> 1000@0 [size=123, length=-1]
+1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
+1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
+2001@0 --(interface)--> 2000@0 [size=132, length=-1]
+2@1000 --(class)--> 1000@0 [size=123, length=-1]
+3@1001 --(class)--> 1001@0 [size=123, length=-1]
+3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@20)--> 5@1002 [size=32, length=-1]
+4@1000 --(class)--> 1000@0 [size=123, length=-1]
+5@1002 --(class)--> 1002@0 [size=123, length=-1]
+5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+6@1000 --(class)--> 1000@0 [size=123, length=-1]
+---
+root@root --(jni-global)--> 1@1000 [size=16, length=-1]
+root@root --(jni-local)--> 1@1000 [size=16, length=-1]
+root@root --(stack-local)--> 1@1000 [size=16, length=-1]
+root@root --(thread)--> 1@1000 [size=16, length=-1]
+root@root --(thread)--> 3000@0 [size=132, length=-1]
+1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
+1002@0 --(interface)--> 2001@0 [size=132, length=-1]
+1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
+1@1000 --(class)--> 1000@0 [size=123, length=-1]
+1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
+1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
+2001@0 --(interface)--> 2000@0 [size=132, length=-1]
+2@1000 --(class)--> 1000@0 [size=123, length=-1]
+3@1001 --(class)--> 1001@0 [size=123, length=-1]
+3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@20)--> 5@1002 [size=32, length=-1]
+4@1000 --(class)--> 1000@0 [size=123, length=-1]
+5@1002 --(class)--> 1002@0 [size=123, length=-1]
+5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+6@1000 --(class)--> 1000@0 [size=123, length=-1]
+---
+root@root --(jni-global)--> 1@1000 [size=16, length=-1]
+root@root --(jni-local)--> 1@1000 [size=16, length=-1]
+root@root --(stack-local)--> 1@1000 [size=16, length=-1]
+root@root --(stack-local)--> 2@1000 [size=16, length=-1]
+root@root --(thread)--> 1@1000 [size=16, length=-1]
+root@root --(thread)--> 2@1000 [size=16, length=-1]
+root@root --(thread)--> 3000@0 [size=132, length=-1]
+1001@0 --(superclass)--> 1000@0 [size=123, length=-1]
+1002@0 --(interface)--> 2001@0 [size=132, length=-1]
+1002@0 --(superclass)--> 1001@0 [size=123, length=-1]
+1@1000 --(class)--> 1000@0 [size=123, length=-1]
+1@1000 --(field@12)--> 3@1001 [size=24, length=-1]
+1@1000 --(field@8)--> 2@1000 [size=16, length=-1]
+2001@0 --(interface)--> 2000@0 [size=132, length=-1]
+2@1000 --(class)--> 1000@0 [size=123, length=-1]
+3@1001 --(class)--> 1001@0 [size=123, length=-1]
+3@1001 --(field@16)--> 4@1000 [size=16, length=-1]
+3@1001 --(field@20)--> 5@1002 [size=32, length=-1]
+4@1000 --(class)--> 1000@0 [size=123, length=-1]
+5@1002 --(class)--> 1002@0 [size=123, length=-1]
+5@1002 --(field@24)--> 6@1000 [size=16, length=-1]
+5@1002 --(field@28)--> 1@1000 [size=16, length=-1]
+6@1000 --(class)--> 1000@0 [size=123, length=-1]
+---
diff --git a/test/913-heaps/heaps.cc b/test/913-heaps/heaps.cc
new file mode 100644
index 0000000..0c627d6
--- /dev/null
+++ b/test/913-heaps/heaps.cc
@@ -0,0 +1,310 @@
+/*
+ * Copyright (C) 2013 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "heaps.h"
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <vector>
+
+#include "base/logging.h"
+#include "base/macros.h"
+#include "base/stringprintf.h"
+#include "jit/jit.h"
+#include "jni.h"
+#include "native_stack_dump.h"
+#include "openjdkjvmti/jvmti.h"
+#include "runtime.h"
+#include "thread-inl.h"
+#include "thread_list.h"
+
+#include "ti-agent/common_helper.h"
+#include "ti-agent/common_load.h"
+
+namespace art {
+namespace Test913Heaps {
+
+extern "C" JNIEXPORT void JNICALL Java_Main_forceGarbageCollection(JNIEnv* env ATTRIBUTE_UNUSED,
+                                                                   jclass klass ATTRIBUTE_UNUSED) {
+  jvmtiError ret = jvmti_env->ForceGarbageCollection();
+  if (ret != JVMTI_ERROR_NONE) {
+    char* err;
+    jvmti_env->GetErrorName(ret, &err);
+    printf("Error forcing a garbage collection: %s\n", err);
+  }
+}
+
+class IterationConfig {
+ public:
+  IterationConfig() {}
+  virtual ~IterationConfig() {}
+
+  virtual jint Handle(jvmtiHeapReferenceKind reference_kind,
+                      const jvmtiHeapReferenceInfo* reference_info,
+                      jlong class_tag,
+                      jlong referrer_class_tag,
+                      jlong size,
+                      jlong* tag_ptr,
+                      jlong* referrer_tag_ptr,
+                      jint length,
+                      void* user_data) = 0;
+};
+
+static jint JNICALL HeapReferenceCallback(jvmtiHeapReferenceKind reference_kind,
+                                          const jvmtiHeapReferenceInfo* reference_info,
+                                          jlong class_tag,
+                                          jlong referrer_class_tag,
+                                          jlong size,
+                                          jlong* tag_ptr,
+                                          jlong* referrer_tag_ptr,
+                                          jint length,
+                                          void* user_data) {
+  IterationConfig* config = reinterpret_cast<IterationConfig*>(user_data);
+  return config->Handle(reference_kind,
+                        reference_info,
+                        class_tag,
+                        referrer_class_tag,
+                        size,
+                        tag_ptr,
+                        referrer_tag_ptr,
+                        length,
+                        user_data);
+}
+
+static bool Run(jint heap_filter,
+                jclass klass_filter,
+                jobject initial_object,
+                IterationConfig* config) {
+  jvmtiHeapCallbacks callbacks;
+  memset(&callbacks, 0, sizeof(jvmtiHeapCallbacks));
+  callbacks.heap_reference_callback = HeapReferenceCallback;
+
+  jvmtiError ret = jvmti_env->FollowReferences(heap_filter,
+                                               klass_filter,
+                                               initial_object,
+                                               &callbacks,
+                                               config);
+  if (ret != JVMTI_ERROR_NONE) {
+    char* err;
+    jvmti_env->GetErrorName(ret, &err);
+    printf("Failure running FollowReferences: %s\n", err);
+    return false;
+  }
+  return true;
+}
+
+extern "C" JNIEXPORT jobjectArray JNICALL Java_Main_followReferences(JNIEnv* env,
+                                                                     jclass klass ATTRIBUTE_UNUSED,
+                                                                     jint heap_filter,
+                                                                     jclass klass_filter,
+                                                                     jobject initial_object,
+                                                                     jint stop_after,
+                                                                     jint follow_set,
+                                                                     jobject jniRef) {
+  class PrintIterationConfig FINAL : public IterationConfig {
+   public:
+    PrintIterationConfig(jint _stop_after, jint _follow_set)
+        : counter_(0),
+          stop_after_(_stop_after),
+          follow_set_(_follow_set) {
+    }
+
+    jint Handle(jvmtiHeapReferenceKind reference_kind,
+                const jvmtiHeapReferenceInfo* reference_info,
+                jlong class_tag,
+                jlong referrer_class_tag,
+                jlong size,
+                jlong* tag_ptr,
+                jlong* referrer_tag_ptr,
+                jint length,
+                void* user_data ATTRIBUTE_UNUSED) OVERRIDE {
+      jlong tag = *tag_ptr;
+      // Only check tagged objects.
+      if (tag == 0) {
+        return JVMTI_VISIT_OBJECTS;
+      }
+
+      Print(reference_kind,
+            reference_info,
+            class_tag,
+            referrer_class_tag,
+            size,
+            tag_ptr,
+            referrer_tag_ptr,
+            length);
+
+      counter_++;
+      if (counter_ == stop_after_) {
+        return JVMTI_VISIT_ABORT;
+      }
+
+      if (tag > 0 && tag < 32) {
+        bool should_visit_references = (follow_set_ & (1 << static_cast<int32_t>(tag))) != 0;
+        return should_visit_references ? JVMTI_VISIT_OBJECTS : 0;
+      }
+
+      return JVMTI_VISIT_OBJECTS;
+    }
+
+    void Print(jvmtiHeapReferenceKind reference_kind,
+               const jvmtiHeapReferenceInfo* reference_info,
+               jlong class_tag,
+               jlong referrer_class_tag,
+               jlong size,
+               jlong* tag_ptr,
+               jlong* referrer_tag_ptr,
+               jint length) {
+      std::string referrer_str;
+      if (referrer_tag_ptr == nullptr) {
+        referrer_str = "root@root";
+      } else {
+        referrer_str = StringPrintf("%" PRId64 "@%" PRId64, *referrer_tag_ptr, referrer_class_tag);
+      }
+
+      jlong adapted_size = size;
+      if (*tag_ptr >= 1000) {
+        // This is a class or interface, the size of which will be dependent on the architecture.
+        // Do not print the size, but detect known values and "normalize" for the golden file.
+        if ((sizeof(void*) == 4 && size == 180) || (sizeof(void*) == 8 && size == 232)) {
+          adapted_size = 123;
+        }
+      }
+
+      lines_.push_back(
+          StringPrintf("%s --(%s)--> %" PRId64 "@%" PRId64 " [size=%" PRId64 ", length=%d]",
+                       referrer_str.c_str(),
+                       GetReferenceTypeStr(reference_kind, reference_info).c_str(),
+                       *tag_ptr,
+                       class_tag,
+                       adapted_size,
+                       length));
+
+      if (reference_kind == JVMTI_HEAP_REFERENCE_THREAD && *tag_ptr == 1000) {
+        DumpStacks();
+      }
+    }
+
+    static void DumpStacks() NO_THREAD_SAFETY_ANALYSIS {
+      auto dump_function = [](art::Thread* t, void* data ATTRIBUTE_UNUSED) {
+        std::string name;
+        t->GetThreadName(name);
+        LOG(ERROR) << name;
+        art::DumpNativeStack(LOG_STREAM(ERROR), t->GetTid());
+      };
+      art::Runtime::Current()->GetThreadList()->ForEach(dump_function, nullptr);
+    }
+
+    static std::string GetReferenceTypeStr(jvmtiHeapReferenceKind reference_kind,
+                                           const jvmtiHeapReferenceInfo* reference_info) {
+      switch (reference_kind) {
+        case JVMTI_HEAP_REFERENCE_CLASS:
+          return "class";
+        case JVMTI_HEAP_REFERENCE_FIELD:
+          return StringPrintf("field@%d", reference_info->field.index);
+        case JVMTI_HEAP_REFERENCE_ARRAY_ELEMENT:
+          return StringPrintf("array-element@%d", reference_info->array.index);
+        case JVMTI_HEAP_REFERENCE_CLASS_LOADER:
+          return "classloader";
+        case JVMTI_HEAP_REFERENCE_SIGNERS:
+          return "signers";
+        case JVMTI_HEAP_REFERENCE_PROTECTION_DOMAIN:
+          return "protection-domain";
+        case JVMTI_HEAP_REFERENCE_INTERFACE:
+          return "interface";
+        case JVMTI_HEAP_REFERENCE_STATIC_FIELD:
+          return StringPrintf("static-field@%d", reference_info->field.index);
+        case JVMTI_HEAP_REFERENCE_CONSTANT_POOL:
+          return "constant-pool";
+        case JVMTI_HEAP_REFERENCE_SUPERCLASS:
+          return "superclass";
+        case JVMTI_HEAP_REFERENCE_JNI_GLOBAL:
+          return "jni-global";
+        case JVMTI_HEAP_REFERENCE_SYSTEM_CLASS:
+          return "system-class";
+        case JVMTI_HEAP_REFERENCE_MONITOR:
+          return "monitor";
+        case JVMTI_HEAP_REFERENCE_STACK_LOCAL:
+          return "stack-local";
+        case JVMTI_HEAP_REFERENCE_JNI_LOCAL:
+          return "jni-local";
+        case JVMTI_HEAP_REFERENCE_THREAD:
+          return "thread";
+        case JVMTI_HEAP_REFERENCE_OTHER:
+          return "other";
+      }
+      return "unknown";
+    }
+
+    const std::vector<std::string>& GetLines() const {
+      return lines_;
+    }
+
+   private:
+    jint counter_;
+    const jint stop_after_;
+    const jint follow_set_;
+    std::vector<std::string> lines_;
+  };
+
+  // If jniRef isn't null, add a local and a global ref.
+  ScopedLocalRef<jobject> jni_local_ref(env, nullptr);
+  jobject jni_global_ref = nullptr;
+  if (jniRef != nullptr) {
+    jni_local_ref.reset(env->NewLocalRef(jniRef));
+    jni_global_ref = env->NewGlobalRef(jniRef);
+  }
+
+  PrintIterationConfig config(stop_after, follow_set);
+  Run(heap_filter, klass_filter, initial_object, &config);
+
+  const std::vector<std::string>& lines = config.GetLines();
+  jobjectArray ret = CreateObjectArray(env,
+                                       static_cast<jint>(lines.size()),
+                                       "java/lang/String",
+                                       [&](jint i) {
+                                         return env->NewStringUTF(lines[i].c_str());
+                                       });
+
+  if (jni_global_ref != nullptr) {
+    env->DeleteGlobalRef(jni_global_ref);
+  }
+
+  return ret;
+}
+
+// Don't do anything
+jint OnLoad(JavaVM* vm,
+            char* options ATTRIBUTE_UNUSED,
+            void* reserved ATTRIBUTE_UNUSED) {
+  if (vm->GetEnv(reinterpret_cast<void**>(&jvmti_env), JVMTI_VERSION_1_0)) {
+    printf("Unable to get jvmti env!\n");
+    return 1;
+  }
+  SetAllCapabilities(jvmti_env);
+  return 0;
+}
+
+extern "C" JNIEXPORT void JNICALL Java_Main_waitForJitCompilation(JNIEnv*, jclass) {
+  jit::Jit* jit = Runtime::Current()->GetJit();
+  if (jit != nullptr) {
+    jit->WaitForCompilationToFinish(Thread::Current());
+  }
+}
+
+}  // namespace Test913Heaps
+}  // namespace art
diff --git a/test/562-no-intermediate/src/Main.java b/test/913-heaps/heaps.h
similarity index 68%
rename from test/562-no-intermediate/src/Main.java
rename to test/913-heaps/heaps.h
index 3b74d6f..bd828ac 100644
--- a/test/562-no-intermediate/src/Main.java
+++ b/test/913-heaps/heaps.h
@@ -14,14 +14,17 @@
  * limitations under the License.
  */
 
-public class Main {
+#ifndef ART_TEST_913_HEAPS_HEAPS_H_
+#define ART_TEST_913_HEAPS_HEAPS_H_
 
-  /// CHECK-START-ARM64: int Main.main(String[]) register_allocator (after)
-  /// CHECK-NOT: IntermediateAddress
-  public static void main(String[] args) {
-    array[index] += Math.cos(42);
-  }
+#include <jni.h>
 
-  static int index = 0;
-  static double[] array = new double[2];
-}
+namespace art {
+namespace Test913Heaps {
+
+jint OnLoad(JavaVM* vm, char* options, void* reserved);
+
+}  // namespace Test913Heaps
+}  // namespace art
+
+#endif  // ART_TEST_913_HEAPS_HEAPS_H_
diff --git a/test/913-heaps/info.txt b/test/913-heaps/info.txt
new file mode 100644
index 0000000..875a5f6
--- /dev/null
+++ b/test/913-heaps/info.txt
@@ -0,0 +1 @@
+Tests basic functions in the jvmti plugin.
diff --git a/test/913-heaps/run b/test/913-heaps/run
new file mode 100755
index 0000000..7bd8cbd
--- /dev/null
+++ b/test/913-heaps/run
@@ -0,0 +1,43 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+plugin=libopenjdkjvmtid.so
+agent=libtiagentd.so
+lib=tiagentd
+if  [[ "$@" == *"-O"* ]]; then
+  agent=libtiagent.so
+  plugin=libopenjdkjvmti.so
+  lib=tiagent
+fi
+
+if [[ "$@" == *"--jvm"* ]]; then
+  arg="jvm"
+else
+  arg="art"
+fi
+
+if [[ "$@" != *"--debuggable"* ]]; then
+  other_args=" -Xcompiler-option --debuggable "
+else
+  other_args=""
+fi
+
+./default-run "$@" --experimental agents \
+                   --experimental runtime-plugins \
+                   --runtime-option -agentpath:${agent}=913-heaps,${arg} \
+                   --android-runtime-option -Xplugin:${plugin} \
+                   ${other_args} \
+                   --args ${lib}
diff --git a/test/913-heaps/src/Main.java b/test/913-heaps/src/Main.java
new file mode 100644
index 0000000..fc00ada
--- /dev/null
+++ b/test/913-heaps/src/Main.java
@@ -0,0 +1,393 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+
+public class Main {
+  public static void main(String[] args) throws Exception {
+    System.loadLibrary(args[1]);
+
+    doTest();
+    doFollowReferencesTest();
+  }
+
+  public static void doTest() throws Exception {
+    setupGcCallback();
+
+    enableGcTracking(true);
+    run();
+    enableGcTracking(false);
+  }
+
+  private static void run() {
+    clearStats();
+    forceGarbageCollection();
+    printStats();
+  }
+
+  private static void clearStats() {
+    getGcStarts();
+    getGcFinishes();
+  }
+
+  private static void printStats() {
+    System.out.println("---");
+    int s = getGcStarts();
+    int f = getGcFinishes();
+    System.out.println((s > 0) + " " + (f > 0));
+  }
+
+  public static void doFollowReferencesTest() throws Exception {
+    // Force GCs to clean up dirt.
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+
+    setTag(Thread.currentThread(), 3000);
+
+    {
+      ArrayList<Object> tmpStorage = new ArrayList<>();
+      doFollowReferencesTestNonRoot(tmpStorage);
+      tmpStorage = null;
+    }
+
+    // Force GCs to clean up dirt.
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+
+    doFollowReferencesTestRoot();
+
+    // Force GCs to clean up dirt.
+    Runtime.getRuntime().gc();
+    Runtime.getRuntime().gc();
+  }
+
+  private static void doFollowReferencesTestNonRoot(ArrayList<Object> tmpStorage) {
+    Verifier v = new Verifier();
+    tagClasses(v);
+    A a = createTree(v);
+    tmpStorage.add(a);
+    v.add("0@0", "1@1000");  // tmpStorage[0] --(array-element)--> a.
+
+    doFollowReferencesTestImpl(null, Integer.MAX_VALUE, -1, null, v, null);
+    doFollowReferencesTestImpl(a.foo, Integer.MAX_VALUE, -1, null, v, "2@1000");
+
+    tmpStorage.clear();
+  }
+
+  private static void doFollowReferencesTestRoot() {
+    Verifier v = new Verifier();
+    tagClasses(v);
+    A a = createTree(v);
+
+    doFollowReferencesTestImpl(null, Integer.MAX_VALUE, -1, a, v, null);
+    doFollowReferencesTestImpl(a.foo, Integer.MAX_VALUE, -1, a, v, "2@1000");
+  }
+
+  private static void doFollowReferencesTestImpl(A root, int stopAfter, int followSet,
+      Object asRoot, Verifier v, String additionalEnabled) {
+    waitForJitCompilation();  // Wait to avoid JIT influence (e.g., JNI globals).
+
+    String[] lines =
+        followReferences(0, null, root, stopAfter, followSet, asRoot);
+
+    v.process(lines, additionalEnabled);
+
+    // TODO: Test filters.
+  }
+
+  private static void tagClasses(Verifier v) {
+    setTag(A.class, 1000);
+
+    setTag(B.class, 1001);
+    v.add("1001@0", "1000@0");  // B.class --(superclass)--> A.class.
+
+    setTag(C.class, 1002);
+    v.add("1002@0", "1001@0");  // C.class --(superclass)--> B.class.
+    v.add("1002@0", "2001@0");  // C.class --(interface)--> I2.class.
+
+    setTag(I1.class, 2000);
+
+    setTag(I2.class, 2001);
+    v.add("2001@0", "2000@0");  // I2.class --(interface)--> I1.class.
+  }
+
+  private static A createTree(Verifier v) {
+    A aInst = new A();
+    setTag(aInst, 1);
+    String aInstStr = "1@1000";
+    String aClassStr = "1000@0";
+    v.add(aInstStr, aClassStr);  // A -->(class) --> A.class.
+
+    A a2Inst = new A();
+    setTag(a2Inst, 2);
+    aInst.foo = a2Inst;
+    String a2InstStr = "2@1000";
+    v.add(a2InstStr, aClassStr);  // A2 -->(class) --> A.class.
+    v.add(aInstStr, a2InstStr);   // A -->(field) --> A2.
+
+    B bInst = new B();
+    setTag(bInst, 3);
+    aInst.foo2 = bInst;
+    String bInstStr = "3@1001";
+    String bClassStr = "1001@0";
+    v.add(bInstStr, bClassStr);  // B -->(class) --> B.class.
+    v.add(aInstStr, bInstStr);   // A -->(field) --> B.
+
+    A a3Inst = new A();
+    setTag(a3Inst, 4);
+    bInst.bar = a3Inst;
+    String a3InstStr = "4@1000";
+    v.add(a3InstStr, aClassStr);  // A3 -->(class) --> A.class.
+    v.add(bInstStr, a3InstStr);   // B -->(field) --> A3.
+
+    C cInst = new C();
+    setTag(cInst, 5);
+    bInst.bar2 = cInst;
+    String cInstStr = "5@1000";
+    String cClassStr = "1002@0";
+    v.add(cInstStr, cClassStr);  // C -->(class) --> C.class.
+    v.add(bInstStr, cInstStr);   // B -->(field) --> C.
+
+    A a4Inst = new A();
+    setTag(a4Inst, 6);
+    cInst.baz = a4Inst;
+    String a4InstStr = "6@1000";
+    v.add(a4InstStr, aClassStr);  // A4 -->(class) --> A.class.
+    v.add(cInstStr, a4InstStr);   // C -->(field) --> A4.
+
+    cInst.baz2 = aInst;
+    v.add(cInstStr, aInstStr);  // C -->(field) --> A.
+
+    return aInst;
+  }
+
+  public static class A {
+    public A foo;
+    public A foo2;
+
+    public A() {}
+    public A(A a, A b) {
+      foo = a;
+      foo2 = b;
+    }
+  }
+
+  public static class B extends A {
+    public A bar;
+    public A bar2;
+
+    public B() {}
+    public B(A a, A b) {
+      bar = a;
+      bar2 = b;
+    }
+  }
+
+  public static interface I1 {
+    public final static int i1Field = 1;
+  }
+
+  public static interface I2 extends I1 {
+    public final static int i2Field = 2;
+  }
+
+  public static class C extends B implements I2 {
+    public A baz;
+    public A baz2;
+
+    public C() {}
+    public C(A a, A b) {
+      baz = a;
+      baz2 = b;
+    }
+  }
+
+  public static class Verifier {
+    public static class Node {
+      public String referrer;
+
+      public HashSet<String> referrees = new HashSet<>();
+
+      public Node(String r) {
+        referrer = r;
+      }
+
+      public boolean isRoot() {
+        return referrer.startsWith("root@");
+      }
+    }
+
+    HashMap<String, Node> nodes = new HashMap<>();
+
+    public Verifier() {
+    }
+
+    public void add(String referrer, String referree) {
+      if (!nodes.containsKey(referrer)) {
+        nodes.put(referrer, new Node(referrer));
+      }
+      if (referree != null) {
+        nodes.get(referrer).referrees.add(referree);
+      }
+    }
+
+    public void process(String[] lines, String additionalEnabledReferrer) {
+      // This method isn't optimal. The loops could be merged. However, it's more readable if
+      // the different parts are separated.
+
+      ArrayList<String> rootLines = new ArrayList<>();
+      ArrayList<String> nonRootLines = new ArrayList<>();
+
+      // Check for consecutive chunks of referrers. Also ensure roots come first.
+      {
+        String currentHead = null;
+        boolean rootsDone = false;
+        HashSet<String> completedReferrers = new HashSet<>();
+        for (String l : lines) {
+          String referrer = getReferrer(l);
+
+          if (isRoot(referrer)) {
+            if (rootsDone) {
+              System.out.println("ERROR: Late root " + l);
+              print(lines);
+              return;
+            }
+            rootLines.add(l);
+            continue;
+          }
+
+          rootsDone = true;
+
+          if (currentHead == null) {
+            currentHead = referrer;
+          } else {
+            if (!currentHead.equals(referrer)) {
+              completedReferrers.add(currentHead);
+              currentHead = referrer;
+              if (completedReferrers.contains(referrer)) {
+                System.out.println("Non-contiguous referrer " + l);
+                print(lines);
+                return;
+              }
+            }
+          }
+          nonRootLines.add(l);
+        }
+      }
+
+      // Sort (root order is not specified) and print the roots.
+      // TODO: What about extra roots? JNI and the interpreter seem to introduce those (though it
+      //       isn't clear why a debuggable-AoT test doesn't have the same, at least for locals).
+      //       For now, swallow duplicates, and resolve once we have the metadata for the roots.
+      {
+        Collections.sort(rootLines);
+        String lastRoot = null;
+        for (String l : rootLines) {
+          if (lastRoot != null && lastRoot.equals(l)) {
+            continue;
+          }
+          lastRoot = l;
+          System.out.println(l);
+        }
+      }
+
+      // Iterate through the lines, keeping track of which referrers are visited, to ensure the
+      // order is acceptable.
+      HashSet<String> enabled = new HashSet<>();
+      if (additionalEnabledReferrer != null) {
+        enabled.add(additionalEnabledReferrer);
+      }
+      // Always add "0@0".
+      enabled.add("0@0");
+
+      for (String l : lines) {
+        String referrer = getReferrer(l);
+        String referree = getReferree(l);
+        if (isRoot(referrer)) {
+          // For a root src, just enable the referree.
+          enabled.add(referree);
+        } else {
+          // Check that the referrer is enabled (may be visited).
+          if (!enabled.contains(referrer)) {
+            System.out.println("Referrer " + referrer + " not enabled: " + l);
+            print(lines);
+            return;
+          }
+          enabled.add(referree);
+        }
+      }
+
+      // Now just sort the non-root lines and output them
+      Collections.sort(nonRootLines);
+      for (String l : nonRootLines) {
+        System.out.println(l);
+      }
+
+      System.out.println("---");
+    }
+
+    public static boolean isRoot(String ref) {
+      return ref.startsWith("root@");
+    }
+
+    private static String getReferrer(String line) {
+      int i = line.indexOf(" --");
+      if (i <= 0) {
+        throw new IllegalArgumentException(line);
+      }
+      int j = line.indexOf(' ');
+      if (i != j) {
+        throw new IllegalArgumentException(line);
+      }
+      return line.substring(0, i);
+    }
+
+    private static String getReferree(String line) {
+      int i = line.indexOf("--> ");
+      if (i <= 0) {
+        throw new IllegalArgumentException(line);
+      }
+      int j = line.indexOf(' ', i + 4);
+      if (j < 0) {
+        throw new IllegalArgumentException(line);
+      }
+      return line.substring(i + 4, j);
+    }
+
+    private static void print(String[] lines) {
+      for (String l : lines) {
+        System.out.println(l);
+      }
+    }
+  }
+
+  private static native void setupGcCallback();
+  private static native void enableGcTracking(boolean enable);
+  private static native int getGcStarts();
+  private static native int getGcFinishes();
+  private static native void forceGarbageCollection();
+
+  private static native void setTag(Object o, long tag);
+  private static native long getTag(Object o);
+
+  private static native String[] followReferences(int heapFilter, Class<?> klassFilter,
+      Object initialObject, int stopAfter, int followSet, Object jniRef);
+
+  private static native void waitForJitCompilation();
+}
diff --git a/test/955-methodhandles-smali/expected.txt b/test/955-methodhandles-smali/expected.txt
index 07d2422..5de1274 100644
--- a/test/955-methodhandles-smali/expected.txt
+++ b/test/955-methodhandles-smali/expected.txt
@@ -1,2 +1,9 @@
 [String1]+[String2]
 [String1]
+[String1]+[String2]
+42
+40
+43
+44
+0
+-1
diff --git a/test/955-methodhandles-smali/smali/Main.smali b/test/955-methodhandles-smali/smali/Main.smali
index 2fc92f8..52460a8 100644
--- a/test/955-methodhandles-smali/smali/Main.smali
+++ b/test/955-methodhandles-smali/smali/Main.smali
@@ -66,8 +66,31 @@
     return-object v0
 .end method
 
-# Returns a method handle to static String java.lang.String.valueOf(String);
-.method public static getStringValueOfHandle()Ljava/lang/invoke/MethodHandle;
+# Returns a method handle to boolean java.lang.Long.compareTo(java.lang.Long other).
+.method public static getLongCompareToHandle()Ljava/lang/invoke/MethodHandle;
+.registers 4
+    new-instance v0, Ljava/lang/Long;
+    const-wide v1, 0
+    invoke-direct {v0, v1, v2}, Ljava/lang/Long;-><init>(J)V
+    invoke-virtual {v0}, Ljava/lang/Object;->getClass()Ljava/lang/Class;
+    move-result-object v0
+
+    # set v0 to Integer.TYPE aka. int.class
+    sget-object v1, Ljava/lang/Integer;->TYPE:Ljava/lang/Class;
+
+    # Call MethodType.methodType(rtype=int.class, ptype[0] = Long.class)
+    invoke-static {v1, v0}, Ljava/lang/invoke/MethodType;->methodType(Ljava/lang/Class;Ljava/lang/Class;)Ljava/lang/invoke/MethodType;
+    move-result-object v2
+
+    const-string v3, "compareTo"
+    # Call Main.getHandleForVirtual(Long.class, "compareTo", methodType);
+    invoke-static {v0, v3, v2}, LMain;->getHandleForVirtual(Ljava/lang/Class;Ljava/lang/String;Ljava/lang/invoke/MethodType;)Ljava/lang/invoke/MethodHandle;
+    move-result-object v0
+    return-object v0
+.end method
+
+# Returns a method handle to static String java.lang.String.valueOf(Object);
+.method public static getStringValueOfObjectHandle()Ljava/lang/invoke/MethodHandle;
 .registers 4
     # set v0 to java.lang.Object.class
     new-instance v0, Ljava/lang/Object;
@@ -90,6 +113,26 @@
     return-object v0
 .end method
 
+# Returns a method handle to static String java.lang.String.valueOf(String);
+.method public static getStringValueOfLongHandle()Ljava/lang/invoke/MethodHandle;
+.registers 4
+    # set v0 to Long.TYPE aka. long.class
+    sget-object v0, Ljava/lang/Long;->TYPE:Ljava/lang/Class;
+
+    # set v1 to the name of the method ("valueOf") and v2 to java.lang.String.class;
+    const-string v1, "valueOf"
+    invoke-virtual {v1}, Ljava/lang/Object;->getClass()Ljava/lang/Class;
+    move-result-object v2
+
+    # Call MethodType.methodType(rtype=String.class, ptype[0]=Long.class)
+    invoke-static {v2, v0}, Ljava/lang/invoke/MethodType;->methodType(Ljava/lang/Class;Ljava/lang/Class;)Ljava/lang/invoke/MethodType;
+    move-result-object v3
+
+    # Call Main.getHandleForStatic(String.class, "valueOf", methodType);
+    invoke-static {v2, v1, v3}, LMain;->getHandleForStatic(Ljava/lang/Class;Ljava/lang/String;Ljava/lang/invoke/MethodType;)Ljava/lang/invoke/MethodHandle;
+    move-result-object v0
+    return-object v0
+.end method
 
 .method public static main([Ljava/lang/String;)V
 .registers 5
@@ -105,7 +148,7 @@
     invoke-virtual {v4, v3}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
 
     # Test case 2: Exercise String.valueOf(Object);
-    invoke-static {}, LMain;->getStringValueOfHandle()Ljava/lang/invoke/MethodHandle;
+    invoke-static {}, LMain;->getStringValueOfObjectHandle()Ljava/lang/invoke/MethodHandle;
     move-result-object v0
     const-string v1, "[String1]"
     invoke-polymorphic {v0, v1}, Ljava/lang/invoke/MethodHandle;->invokeExact([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/Object;)Ljava/lang/String;
@@ -113,5 +156,86 @@
     sget-object v4, Ljava/lang/System;->out:Ljava/io/PrintStream;
     invoke-virtual {v4, v3}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
 
+    # Test case 3: Exercise String.concat(String, String) with an inexact invoke.
+    # Note that the callsite type here is String type(Object, Object); so the runtime
+    # will generate dynamic type checks for the input arguments.
+    invoke-static {}, LMain;->getStringConcatHandle()Ljava/lang/invoke/MethodHandle;
+    move-result-object v0
+    const-string v1, "[String1]"
+    const-string v2, "+[String2]"
+    invoke-polymorphic {v0, v1, v2}, Ljava/lang/invoke/MethodHandle;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/String;
+    move-result-object v3
+    sget-object v4, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v4, v3}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+
+    # Test case 4: Exercise String.valueOf(long);
+    #
+    # We exercise it with various types of unboxing / widening conversions
+    invoke-static {}, LMain;->getStringValueOfLongHandle()Ljava/lang/invoke/MethodHandle;
+    move-result-object v0
+
+    # First use a long, this is an invokeExact because the callsite type matches
+    # the function type precisely.
+    const-wide v1, 42
+    invoke-polymorphic {v0, v1, v2}, Ljava/lang/invoke/MethodHandle;->invokeExact([Ljava/lang/Object;)Ljava/lang/Object;, (J)Ljava/lang/String;
+    move-result-object v3
+    sget-object v4, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v4, v3}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+
+    # Then use an int, should perform a widening conversion.
+    const v1, 40
+    invoke-polymorphic {v0, v1}, Ljava/lang/invoke/MethodHandle;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (I)Ljava/lang/String;
+    move-result-object v3
+    sget-object v4, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v4, v3}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+
+    # Then use a java/lang/Long; - should perform an unboxing conversion.
+    new-instance v1, Ljava/lang/Long;
+    const-wide v2, 43
+    invoke-direct {v1, v2, v3}, Ljava/lang/Long;-><init>(J)V
+    invoke-polymorphic {v0, v1}, Ljava/lang/invoke/MethodHandle;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/Long;)Ljava/lang/String;
+    move-result-object v3
+    sget-object v4, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v4, v3}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+
+    # Then use a java/lang/Integer; - should perform an unboxing in addition to a widening conversion.
+    new-instance v1, Ljava/lang/Integer;
+    const v2, 44
+    invoke-direct {v1, v2}, Ljava/lang/Integer;-><init>(I)V
+    invoke-polymorphic {v0, v1}, Ljava/lang/invoke/MethodHandle;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/Integer;)Ljava/lang/String;
+    move-result-object v3
+    sget-object v4, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v4, v3}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+
+    # Test case 5: Exercise int Long.compareTo(Long)
+    invoke-static {}, LMain;->getLongCompareToHandle()Ljava/lang/invoke/MethodHandle;
+    move-result-object v0
+    new-instance v1, Ljava/lang/Long;
+    const-wide v2, 43
+    invoke-direct {v1, v2, v3}, Ljava/lang/Long;-><init>(J)V
+
+    # At this point, v0 is our MethodHandle and v1 is the instance we're going to call compareTo on.
+
+    # Call compareTo(Long) - this is invokeExact semantics.
+    invoke-polymorphic {v0, v1, v1}, Ljava/lang/invoke/MethodHandle;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/Long;Ljava/lang/Long;)I
+    move-result v3
+    sget-object v4, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v4, v3}, Ljava/io/PrintStream;->println(I)V
+
+    # Call compareTo(long) - this is an implicit box.
+    const-wide v2, 44
+    invoke-polymorphic {v0, v1, v2, v3}, Ljava/lang/invoke/MethodHandle;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/Long;J)I
+    move-result v3
+    sget-object v4, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v4, v3}, Ljava/io/PrintStream;->println(I)V
+
+    # Call compareTo(int) - this is an implicit box.
+# This throws WrongMethodTypeException as it's a two step conversion int->long->Long or int->Integer->Long.
+#    const v2, 40
+#    invoke-polymorphic {v0, v1, v2}, Ljava/lang/invoke/MethodHandle;->invoke([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/Long;I)I
+#    move-result v3
+#    sget-object v4, Ljava/lang/System;->out:Ljava/io/PrintStream;
+#    invoke-virtual {v4, v3}, Ljava/io/PrintStream;->print(I)V
+
     return-void
 .end method
diff --git a/test/956-methodhandles/build b/test/956-methodhandles/build
new file mode 100755
index 0000000..a423ca6
--- /dev/null
+++ b/test/956-methodhandles/build
@@ -0,0 +1,25 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+if [[ $@ != *"--jvm"* ]]; then
+  # Don't do anything with jvm.
+  export USE_JACK=true
+fi
+
+./default-build "$@" --experimental method-handles
diff --git a/test/956-methodhandles/expected.txt b/test/956-methodhandles/expected.txt
new file mode 100644
index 0000000..0a5caa1
--- /dev/null
+++ b/test/956-methodhandles/expected.txt
@@ -0,0 +1,9 @@
+foo_A
+foo_A
+foo_A
+foo_B
+privateRyan_D
+Received exception: Expected (java.lang.String, java.lang.String)java.lang.String but was (java.lang.String, java.lang.Object)void
+String constructors done.
+testReferenceReturnValueConversions done.
+testPrimitiveReturnValueConversions done.
diff --git a/test/956-methodhandles/info.txt b/test/956-methodhandles/info.txt
new file mode 100644
index 0000000..f1dbb61
--- /dev/null
+++ b/test/956-methodhandles/info.txt
@@ -0,0 +1,3 @@
+Tests for method handle invocations.
+
+NOTE: needs to run under ART or a Java 8 Language runtime and compiler.
diff --git a/test/956-methodhandles/run b/test/956-methodhandles/run
new file mode 100755
index 0000000..a9f1822
--- /dev/null
+++ b/test/956-methodhandles/run
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+./default-run "$@" --experimental method-handles
diff --git a/test/956-methodhandles/src/Main.java b/test/956-methodhandles/src/Main.java
new file mode 100644
index 0000000..8713caa
--- /dev/null
+++ b/test/956-methodhandles/src/Main.java
@@ -0,0 +1,963 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.MethodHandles.Lookup;
+import java.lang.invoke.MethodType;
+import java.lang.invoke.WrongMethodTypeException;
+import java.nio.charset.Charset;
+import java.nio.charset.StandardCharsets;
+
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.util.Arrays;
+
+public class Main {
+
+  public static class A {
+    public A() {}
+
+    public void foo() {
+      System.out.println("foo_A");
+    }
+
+    public static final Lookup lookup = MethodHandles.lookup();
+  }
+
+  public static class B extends A {
+    public void foo() {
+      System.out.println("foo_B");
+    }
+
+    public static final Lookup lookup = MethodHandles.lookup();
+  }
+
+  public static class C extends B {
+    public static final Lookup lookup = MethodHandles.lookup();
+  }
+
+  public static class D {
+    private final void privateRyan() {
+      System.out.println("privateRyan_D");
+    }
+
+    public static final Lookup lookup = MethodHandles.lookup();
+  }
+
+  public static class E extends D {
+    public static final Lookup lookup = MethodHandles.lookup();
+  }
+
+  public static void main(String[] args) throws Throwable {
+    testfindSpecial_invokeSuperBehaviour();
+    testfindSpecial_invokeDirectBehaviour();
+    testExceptionDetailMessages();
+    testfindVirtual();
+    testfindStatic();
+    testUnreflects();
+    testAsType();
+    testConstructors();
+    testStringConstructors();
+    testReturnValueConversions();
+  }
+
+  public static void testfindSpecial_invokeSuperBehaviour() throws Throwable {
+    // This is equivalent to an invoke-super instruction where the referrer
+    // is B.class.
+    MethodHandle mh1 = B.lookup.findSpecial(A.class /* refC */, "foo",
+        MethodType.methodType(void.class), B.class /* specialCaller */);
+
+    A aInstance = new A();
+    B bInstance = new B();
+    C cInstance = new C();
+
+    // This should be as if an invoke-super was called from one of B's methods.
+    mh1.invokeExact(bInstance);
+    mh1.invoke(bInstance);
+
+    // This should not work. The receiver type in the handle will be suitably
+    // restricted to B and subclasses.
+    try {
+      mh1.invoke(aInstance);
+      System.out.println("mh1.invoke(aInstance) should not succeeed");
+    } catch (ClassCastException expected) {
+    }
+
+    try {
+      mh1.invokeExact(aInstance);
+      System.out.println("mh1.invoke(aInstance) should not succeeed");
+    } catch (WrongMethodTypeException expected) {
+    }
+
+    // This should *still* be as if an invoke-super was called from one of C's
+    // methods, despite the fact that we're operating on a C.
+    mh1.invoke(cInstance);
+
+    // Now that C is the special caller, the next invoke will call B.foo.
+    MethodHandle mh2 = C.lookup.findSpecial(A.class /* refC */, "foo",
+        MethodType.methodType(void.class), C.class /* specialCaller */);
+    mh2.invokeExact(cInstance);
+
+    // Shouldn't allow invoke-super semantics from an unrelated special caller.
+    try {
+      C.lookup.findSpecial(A.class, "foo",
+        MethodType.methodType(void.class), D.class /* specialCaller */);
+      System.out.println("findSpecial(A.class, foo, .. D.class) unexpectedly succeeded.");
+    } catch (IllegalAccessException expected) {
+    }
+
+    // Check return type matches for find.
+    try {
+      B.lookup.findSpecial(A.class /* refC */, "foo",
+                           MethodType.methodType(int.class), B.class /* specialCaller */);
+      fail();
+    } catch (NoSuchMethodException e) {}
+    // Check constructors
+    try {
+      B.lookup.findSpecial(A.class /* refC */, "<init>",
+                           MethodType.methodType(void.class), B.class /* specialCaller */);
+      fail();
+    } catch (NoSuchMethodException e) {}
+  }
+
+  public static void testfindSpecial_invokeDirectBehaviour() throws Throwable {
+    D dInstance = new D();
+
+    MethodHandle mh3 = D.lookup.findSpecial(D.class, "privateRyan",
+        MethodType.methodType(void.class), D.class /* specialCaller */);
+    mh3.invoke(dInstance);
+
+    // The private method shouldn't be accessible from any special caller except
+    // itself...
+    try {
+      D.lookup.findSpecial(D.class, "privateRyan", MethodType.methodType(void.class), C.class);
+      System.out.println("findSpecial(privateRyan, C.class) unexpectedly succeeded");
+    } catch (IllegalAccessException expected) {
+    }
+
+    // ... or from any lookup context except its own.
+    try {
+      E.lookup.findSpecial(D.class, "privateRyan", MethodType.methodType(void.class), E.class);
+      System.out.println("findSpecial(privateRyan, E.class) unexpectedly succeeded");
+    } catch (IllegalAccessException expected) {
+    }
+  }
+
+  public static void testExceptionDetailMessages() throws Throwable {
+    MethodHandle handle = MethodHandles.lookup().findVirtual(String.class, "concat",
+        MethodType.methodType(String.class, String.class));
+
+    try {
+      handle.invokeExact("a", new Object());
+      System.out.println("invokeExact(\"a\", new Object()) unexpectedly succeeded.");
+    } catch (WrongMethodTypeException ex) {
+      System.out.println("Received exception: " + ex.getMessage());
+    }
+  }
+
+  public interface Foo {
+    public String foo();
+  }
+
+  public interface Bar extends Foo {
+    public String bar();
+  }
+
+  public static class BarSuper {
+    public String superPublicMethod() {
+      return "superPublicMethod";
+    }
+
+    public String superProtectedMethod() {
+      return "superProtectedMethod";
+    }
+
+    String superPackageMethod() {
+      return "superPackageMethod";
+    }
+  }
+
+  public static class BarImpl extends BarSuper implements Bar {
+    public BarImpl() {
+    }
+
+    @Override
+    public String foo() {
+      return "foo";
+    }
+
+    @Override
+    public String bar() {
+      return "bar";
+    }
+
+    public String add(int x, int y) {
+      return Arrays.toString(new int[] { x, y });
+    }
+
+    private String privateMethod() { return "privateMethod"; }
+
+    public static String staticMethod() { return staticString; }
+
+    private static String staticString;
+
+    {
+      // Static constructor
+      staticString = Long.toString(System.currentTimeMillis());
+    }
+
+    static final MethodHandles.Lookup lookup = MethodHandles.lookup();
+  }
+
+  public static void testfindVirtual() throws Throwable {
+    // Virtual lookups on static methods should not succeed.
+    try {
+        MethodHandles.lookup().findVirtual(
+            BarImpl.class,  "staticMethod", MethodType.methodType(String.class));
+        System.out.println("findVirtual(staticMethod) unexpectedly succeeded");
+    } catch (IllegalAccessException expected) {
+    }
+
+    // Virtual lookups on private methods should not succeed, unless the Lookup
+    // context had sufficient privileges.
+    try {
+        MethodHandles.lookup().findVirtual(
+            BarImpl.class,  "privateMethod", MethodType.methodType(String.class));
+        System.out.println("findVirtual(privateMethod) unexpectedly succeeded");
+    } catch (IllegalAccessException expected) {
+    }
+
+    // Virtual lookup on a private method with a context that *does* have sufficient
+    // privileges.
+    MethodHandle mh = BarImpl.lookup.findVirtual(
+            BarImpl.class,  "privateMethod", MethodType.methodType(String.class));
+    String str = (String) mh.invoke(new BarImpl());
+    if (!"privateMethod".equals(str)) {
+      System.out.println("Unexpected return value for BarImpl#privateMethod: " + str);
+    }
+
+    // Find virtual must find interface methods defined by interfaces implemented
+    // by the class.
+    mh = MethodHandles.lookup().findVirtual(BarImpl.class, "foo",
+        MethodType.methodType(String.class));
+    str = (String) mh.invoke(new BarImpl());
+    if (!"foo".equals(str)) {
+      System.out.println("Unexpected return value for BarImpl#foo: " + str);
+    }
+
+    // Find virtual should check rtype.
+    try {
+      mh = MethodHandles.lookup().findVirtual(BarImpl.class, "foo",
+                                              MethodType.methodType(void.class));
+      fail();
+    } catch (NoSuchMethodException e) {}
+
+    // And ptypes
+    mh = MethodHandles.lookup().findVirtual(
+        BarImpl.class, "add", MethodType.methodType(String.class, int.class, int.class));
+    try {
+      mh = MethodHandles.lookup().findVirtual(
+          BarImpl.class, "add", MethodType.methodType(String.class, Integer.class, int.class));
+    } catch (NoSuchMethodException e) {}
+
+    // .. and their super-interfaces.
+    mh = MethodHandles.lookup().findVirtual(BarImpl.class, "bar",
+        MethodType.methodType(String.class));
+    str = (String) mh.invoke(new BarImpl());
+    if (!"bar".equals(str)) {
+      System.out.println("Unexpected return value for BarImpl#bar: " + str);
+    }
+
+    // TODO(narayan): Fix this case, we're using the wrong ArtMethod for the
+    // invoke resulting in a failing check in the interpreter.
+    //
+    // mh = MethodHandles.lookup().findVirtual(Bar.class, "bar",
+    //    MethodType.methodType(String.class));
+    // str = (String) mh.invoke(new BarImpl());
+    // if (!"bar".equals(str)) {
+    //   System.out.println("Unexpected return value for BarImpl#bar: " + str);
+    // }
+
+    // We should also be able to lookup public / protected / package methods in
+    // the super class, given sufficient access privileges.
+    mh = MethodHandles.lookup().findVirtual(BarImpl.class, "superPublicMethod",
+        MethodType.methodType(String.class));
+    str = (String) mh.invoke(new BarImpl());
+    if (!"superPublicMethod".equals(str)) {
+      System.out.println("Unexpected return value for BarImpl#superPublicMethod: " + str);
+    }
+
+    mh = MethodHandles.lookup().findVirtual(BarImpl.class, "superProtectedMethod",
+        MethodType.methodType(String.class));
+    str = (String) mh.invoke(new BarImpl());
+    if (!"superProtectedMethod".equals(str)) {
+      System.out.println("Unexpected return value for BarImpl#superProtectedMethod: " + str);
+    }
+
+    mh = MethodHandles.lookup().findVirtual(BarImpl.class, "superPackageMethod",
+        MethodType.methodType(String.class));
+    str = (String) mh.invoke(new BarImpl());
+    if (!"superPackageMethod".equals(str)) {
+      System.out.println("Unexpected return value for BarImpl#superPackageMethod: " + str);
+    }
+
+    try {
+      MethodHandles.lookup().findVirtual(BarImpl.class, "<init>",
+                                        MethodType.methodType(void.class));
+      fail();
+    } catch (NoSuchMethodException e) {}
+  }
+
+  public static void testfindStatic() throws Throwable {
+    MethodHandles.lookup().findStatic(BarImpl.class, "staticMethod",
+                                      MethodType.methodType(String.class));
+    try {
+      MethodHandles.lookup().findStatic(BarImpl.class, "staticMethod",
+                                        MethodType.methodType(void.class));
+      fail();
+    } catch (NoSuchMethodException e) {}
+    try {
+      MethodHandles.lookup().findStatic(BarImpl.class, "staticMethod",
+                                        MethodType.methodType(String.class, int.class));
+      fail();
+    } catch (NoSuchMethodException e) {}
+    try {
+      MethodHandles.lookup().findStatic(BarImpl.class, "<clinit>",
+                                        MethodType.methodType(void.class));
+      fail();
+    } catch (NoSuchMethodException e) {}
+    try {
+      MethodHandles.lookup().findStatic(BarImpl.class, "<init>",
+                                        MethodType.methodType(void.class));
+      fail();
+    } catch (NoSuchMethodException e) {}
+  }
+
+  static class UnreflectTester {
+    public String publicField;
+    private String privateField;
+
+    public static String publicStaticField = "publicStaticValue";
+    private static String privateStaticField = "privateStaticValue";
+
+    private UnreflectTester(String val) {
+      publicField = val;
+      privateField = val;
+    }
+
+    // NOTE: The boolean constructor argument only exists to give this a
+    // different signature.
+    public UnreflectTester(String val, boolean unused) {
+      this(val);
+    }
+
+    private static String privateStaticMethod() {
+      return "privateStaticMethod";
+    }
+
+    private String privateMethod() {
+      return "privateMethod";
+    }
+
+    public static String publicStaticMethod() {
+      return "publicStaticMethod";
+    }
+
+    public String publicMethod() {
+      return "publicMethod";
+    }
+  }
+
+  public static void testUnreflects() throws Throwable {
+    UnreflectTester instance = new UnreflectTester("unused");
+    Method publicMethod = UnreflectTester.class.getMethod("publicMethod");
+
+    MethodHandle mh = MethodHandles.lookup().unreflect(publicMethod);
+    assertEquals("publicMethod", (String) mh.invoke(instance));
+    assertEquals("publicMethod", (String) mh.invokeExact(instance));
+
+    Method publicStaticMethod = UnreflectTester.class.getMethod("publicStaticMethod");
+    mh = MethodHandles.lookup().unreflect(publicStaticMethod);
+    assertEquals("publicStaticMethod", (String) mh.invoke());
+    assertEquals("publicStaticMethod", (String) mh.invokeExact());
+
+    Method privateMethod = UnreflectTester.class.getDeclaredMethod("privateMethod");
+    try {
+      mh = MethodHandles.lookup().unreflect(privateMethod);
+      fail();
+    } catch (IllegalAccessException expected) {}
+
+    privateMethod.setAccessible(true);
+    mh = MethodHandles.lookup().unreflect(privateMethod);
+    assertEquals("privateMethod", (String) mh.invoke(instance));
+    assertEquals("privateMethod", (String) mh.invokeExact(instance));
+
+    Method privateStaticMethod = UnreflectTester.class.getDeclaredMethod("privateStaticMethod");
+    try {
+      mh = MethodHandles.lookup().unreflect(privateStaticMethod);
+      fail();
+    } catch (IllegalAccessException expected) {}
+
+    privateStaticMethod.setAccessible(true);
+    mh = MethodHandles.lookup().unreflect(privateStaticMethod);
+    assertEquals("privateStaticMethod", (String) mh.invoke());
+    assertEquals("privateStaticMethod", (String) mh.invokeExact());
+
+    Constructor privateConstructor = UnreflectTester.class.getDeclaredConstructor(String.class);
+    try {
+      mh = MethodHandles.lookup().unreflectConstructor(privateConstructor);
+      fail();
+    } catch (IllegalAccessException expected) {}
+
+    privateConstructor.setAccessible(true);
+    mh = MethodHandles.lookup().unreflectConstructor(privateConstructor);
+    instance = (UnreflectTester) mh.invokeExact("abc");
+    assertEquals("abc", instance.publicField);
+    instance = (UnreflectTester) mh.invoke("def");
+    assertEquals("def", instance.publicField);
+    Constructor publicConstructor = UnreflectTester.class.getConstructor(String.class,
+        boolean.class);
+    mh = MethodHandles.lookup().unreflectConstructor(publicConstructor);
+    instance = (UnreflectTester) mh.invokeExact("abc", false);
+    assertEquals("abc", instance.publicField);
+    instance = (UnreflectTester) mh.invoke("def", true);
+    assertEquals("def", instance.publicField);
+
+    // TODO(narayan): Non exact invokes for field sets/gets are not implemented yet.
+    //
+    // assertEquals("instanceValue", (String) mh.invoke(new UnreflectTester("instanceValue")));
+    Field publicField = UnreflectTester.class.getField("publicField");
+    mh = MethodHandles.lookup().unreflectGetter(publicField);
+    instance = new UnreflectTester("instanceValue");
+    assertEquals("instanceValue", (String) mh.invokeExact(instance));
+
+    mh = MethodHandles.lookup().unreflectSetter(publicField);
+    instance = new UnreflectTester("instanceValue");
+    mh.invokeExact(instance, "updatedInstanceValue");
+    assertEquals("updatedInstanceValue", instance.publicField);
+
+    Field publicStaticField = UnreflectTester.class.getField("publicStaticField");
+    mh = MethodHandles.lookup().unreflectGetter(publicStaticField);
+    UnreflectTester.publicStaticField = "updatedStaticValue";
+    assertEquals("updatedStaticValue", (String) mh.invokeExact());
+
+    mh = MethodHandles.lookup().unreflectSetter(publicStaticField);
+    UnreflectTester.publicStaticField = "updatedStaticValue";
+    mh.invokeExact("updatedStaticValue2");
+    assertEquals("updatedStaticValue2", UnreflectTester.publicStaticField);
+
+    Field privateField = UnreflectTester.class.getDeclaredField("privateField");
+    try {
+      mh = MethodHandles.lookup().unreflectGetter(privateField);
+      fail();
+    } catch (IllegalAccessException expected) {
+    }
+    try {
+      mh = MethodHandles.lookup().unreflectSetter(privateField);
+      fail();
+    } catch (IllegalAccessException expected) {
+    }
+
+    privateField.setAccessible(true);
+
+    mh = MethodHandles.lookup().unreflectGetter(privateField);
+    instance = new UnreflectTester("instanceValue");
+    assertEquals("instanceValue", (String) mh.invokeExact(instance));
+
+    mh = MethodHandles.lookup().unreflectSetter(privateField);
+    instance = new UnreflectTester("instanceValue");
+    mh.invokeExact(instance, "updatedInstanceValue");
+    assertEquals("updatedInstanceValue", instance.privateField);
+
+    Field privateStaticField = UnreflectTester.class.getDeclaredField("privateStaticField");
+    try {
+      mh = MethodHandles.lookup().unreflectGetter(privateStaticField);
+      fail();
+    } catch (IllegalAccessException expected) {
+    }
+    try {
+      mh = MethodHandles.lookup().unreflectSetter(privateStaticField);
+      fail();
+    } catch (IllegalAccessException expected) {
+    }
+
+    privateStaticField.setAccessible(true);
+    mh = MethodHandles.lookup().unreflectGetter(privateStaticField);
+    privateStaticField.set(null, "updatedStaticValue");
+    assertEquals("updatedStaticValue", (String) mh.invokeExact());
+
+    mh = MethodHandles.lookup().unreflectSetter(privateStaticField);
+    privateStaticField.set(null, "updatedStaticValue");
+    mh.invokeExact("updatedStaticValue2");
+    assertEquals("updatedStaticValue2", (String) privateStaticField.get(null));
+  }
+
+  // This method only exists to fool Jack's handling of types. See b/32536744.
+  public static CharSequence getSequence() {
+    return "foo";
+  }
+
+  public static void testAsType() throws Throwable {
+    // The type of this handle is (String, String)String.
+    MethodHandle mh = MethodHandles.lookup().findVirtual(String.class,
+        "concat", MethodType.methodType(String.class, String.class));
+
+    // Change it to (CharSequence, String)Object.
+    MethodHandle asType = mh.asType(
+        MethodType.methodType(Object.class, CharSequence.class, String.class));
+
+    Object obj = asType.invokeExact((CharSequence) getSequence(), "bar");
+    assertEquals("foobar", (String) obj);
+
+    // Should fail due to a wrong return type.
+    try {
+      String str = (String) asType.invokeExact((CharSequence) getSequence(), "bar");
+      fail();
+    } catch (WrongMethodTypeException expected) {
+    }
+
+    // Should fail due to a wrong argument type (String instead of Charsequence).
+    try {
+      String str = (String) asType.invokeExact("baz", "bar");
+      fail();
+    } catch (WrongMethodTypeException expected) {
+    }
+
+    // Calls to asType should fail if the types are not convertible.
+    //
+    // Bad return type conversion.
+    try {
+      mh.asType(MethodType.methodType(int.class, String.class, String.class));
+      fail();
+    } catch (WrongMethodTypeException expected) {
+    }
+
+    // Bad argument conversion.
+    try {
+      mh.asType(MethodType.methodType(String.class, int.class, String.class));
+      fail();
+    } catch (WrongMethodTypeException expected) {
+    }
+  }
+
+  public static void assertEquals(String s1, String s2) {
+    if (s1 == s2) {
+      return;
+    }
+
+    if (s1 != null && s2 != null && s1.equals(s2)) {
+      return;
+    }
+
+    throw new AssertionError("assertEquals s1: " + s1 + ", s2: " + s2);
+  }
+
+  public static void fail() {
+    System.out.println("fail");
+    Thread.dumpStack();
+  }
+
+  public static void fail(String message) {
+    System.out.println("fail: " + message);
+    Thread.dumpStack();
+  }
+
+  public static void testConstructors() throws Throwable {
+    MethodHandle mh =
+        MethodHandles.lookup().findConstructor(Float.class,
+                                               MethodType.methodType(void.class,
+                                                                     float.class));
+    Float value = (Float) mh.invokeExact(0.33f);
+    if (value.floatValue() != 0.33f) {
+      fail("Unexpected float value from invokeExact " + value.floatValue());
+    }
+
+    value = (Float) mh.invoke(3.34f);
+    if (value.floatValue() != 3.34f) {
+      fail("Unexpected float value from invoke " + value.floatValue());
+    }
+
+    mh = MethodHandles.lookup().findConstructor(Double.class,
+                                                MethodType.methodType(void.class, String.class));
+    Double d = (Double) mh.invoke("8.45e3");
+    if (d.doubleValue() != 8.45e3) {
+      fail("Unexpected double value from Double(String) " + value.doubleValue());
+    }
+
+    mh = MethodHandles.lookup().findConstructor(Double.class,
+                                                MethodType.methodType(void.class, double.class));
+    d = (Double) mh.invoke(8.45e3);
+    if (d.doubleValue() != 8.45e3) {
+      fail("Unexpected double value from Double(double) " + value.doubleValue());
+    }
+
+    // Primitive type
+    try {
+      mh = MethodHandles.lookup().findConstructor(int.class, MethodType.methodType(void.class));
+      fail("Unexpected lookup success for primitive constructor");
+    } catch (NoSuchMethodException e) {}
+
+    // Interface
+    try {
+      mh = MethodHandles.lookup().findConstructor(Readable.class,
+                                                  MethodType.methodType(void.class));
+      fail("Unexpected lookup success for interface constructor");
+    } catch (NoSuchMethodException e) {}
+
+    // Abstract
+    mh = MethodHandles.lookup().findConstructor(Process.class, MethodType.methodType(void.class));
+    try {
+      mh.invoke();
+      fail("Unexpected ability to instantiate an abstract class");
+    } catch (InstantiationException e) {}
+
+    // Non-existent
+    try {
+        MethodHandle bad = MethodHandles.lookup().findConstructor(
+            String.class, MethodType.methodType(String.class, Float.class));
+        fail("Unexpected success for non-existent constructor");
+    } catch (NoSuchMethodException e) {}
+
+    // Non-void constructor search. (I)I instead of (I)V.
+    try {
+        MethodHandle foo = MethodHandles.lookup().findConstructor(
+            Integer.class, MethodType.methodType(Integer.class, Integer.class));
+        fail("Unexpected success for non-void type for findConstructor");
+    } catch (NoSuchMethodException e) {}
+  }
+
+  public static void testStringConstructors() throws Throwable {
+    final String testPattern = "The system as we know it is broken";
+
+    // String()
+    MethodHandle mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class));
+    String s = (String) mh.invokeExact();
+    if (!s.equals("")) {
+      fail("Unexpected empty string constructor result: '" + s + "'");
+    }
+
+    // String(String)
+    mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class, String.class));
+    s = (String) mh.invokeExact(testPattern);
+    if (!s.equals(testPattern)) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(char[])
+    mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class, char[].class));
+    s = (String) mh.invokeExact(testPattern.toCharArray());
+    if (!s.equals(testPattern)) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(char[], int, int)
+    mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class, char[].class, int.class, int.class));
+    s = (String) mh.invokeExact(new char [] { 'a', 'b', 'c', 'd', 'e'}, 2, 3);
+    if (!s.equals("cde")) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(int[] codePoints, int offset, int count)
+    StringBuffer sb = new StringBuffer(testPattern);
+    int[] codePoints = new int[sb.codePointCount(0, sb.length())];
+    for (int i = 0; i < sb.length(); ++i) {
+      codePoints[i] = sb.codePointAt(i);
+    }
+    mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class, int[].class, int.class, int.class));
+    s = (String) mh.invokeExact(codePoints, 0, codePoints.length);
+    if (!s.equals(testPattern)) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(byte ascii[], int hibyte, int offset, int count)
+    byte [] ascii = testPattern.getBytes(StandardCharsets.US_ASCII);
+    mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class, byte[].class, int.class, int.class));
+    s = (String) mh.invokeExact(ascii, 0, ascii.length);
+    if (!s.equals(testPattern)) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(byte bytes[], int offset, int length, String charsetName)
+    mh = MethodHandles.lookup().findConstructor(
+        String.class,
+        MethodType.methodType(void.class, byte[].class, int.class, int.class, String.class));
+    s = (String) mh.invokeExact(ascii, 0, 5, StandardCharsets.US_ASCII.name());
+    if (!s.equals(testPattern.substring(0, 5))) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(byte bytes[], int offset, int length, Charset charset)
+    mh = MethodHandles.lookup().findConstructor(
+        String.class,
+        MethodType.methodType(void.class, byte[].class, int.class, int.class, Charset.class));
+    s = (String) mh.invokeExact(ascii, 0, 5, StandardCharsets.US_ASCII);
+    if (!s.equals(testPattern.substring(0, 5))) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(byte bytes[], String charsetName)
+    mh = MethodHandles.lookup().findConstructor(
+        String.class,
+        MethodType.methodType(void.class, byte[].class, String.class));
+    s = (String) mh.invokeExact(ascii, StandardCharsets.US_ASCII.name());
+    if (!s.equals(testPattern)) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(byte bytes[], Charset charset)
+    mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class, byte[].class, Charset.class));
+    s = (String) mh.invokeExact(ascii, StandardCharsets.US_ASCII);
+    if (!s.equals(testPattern)) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(byte bytes[], int offset, int length)
+    mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class, byte[].class, int.class, int.class));
+    s = (String) mh.invokeExact(ascii, 1, ascii.length - 2);
+    s = testPattern.charAt(0) + s + testPattern.charAt(testPattern.length() - 1);
+    if (!s.equals(testPattern)) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(byte bytes[])
+    mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class, byte[].class));
+    s = (String) mh.invokeExact(ascii);
+    if (!s.equals(testPattern)) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    // String(StringBuffer buffer)
+    mh = MethodHandles.lookup().findConstructor(
+        String.class, MethodType.methodType(void.class, StringBuffer.class));
+    s = (String) mh.invokeExact(sb);
+    if (!s.equals(testPattern)) {
+      fail("Unexpected string constructor result: '" + s + "'");
+    }
+
+    System.out.println("String constructors done.");
+  }
+
+  private static void testReferenceReturnValueConversions() throws Throwable {
+    MethodHandle mh = MethodHandles.lookup().findStatic(
+        Float.class, "valueOf", MethodType.methodType(Float.class, String.class));
+
+    // No conversion
+    Float f = (Float) mh.invokeExact("1.375");
+    if (f.floatValue() != 1.375) {
+      fail();
+    }
+    f = (Float) mh.invoke("1.875");
+    if (f.floatValue() != 1.875) {
+      fail();
+    }
+
+    // Bad conversion
+    try {
+      int i = (int) mh.invokeExact("7.77");
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    try {
+      int i = (int) mh.invoke("7.77");
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    // Assignment to super-class.
+    Number n = (Number) mh.invoke("1.11");
+    try {
+      Number o = (Number) mh.invokeExact("1.11");
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    // Assignment to widened boxed primitive class.
+    try {
+      Double u = (Double) mh.invoke("1.11");
+      fail();
+    } catch (ClassCastException e) {}
+
+    try {
+      Double v = (Double) mh.invokeExact("1.11");
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    // Unboxed
+    float p = (float) mh.invoke("1.11");
+    if (p != 1.11f) {
+      fail();
+    }
+
+    // Unboxed and widened
+    double d = (double) mh.invoke("2.5");
+    if (d != 2.5) {
+      fail();
+    }
+
+    // Interface
+    Comparable<Float> c = (Comparable<Float>) mh.invoke("2.125");
+    if (c.compareTo(new Float(2.125f)) != 0) {
+      fail();
+    }
+
+    System.out.println("testReferenceReturnValueConversions done.");
+  }
+
+  private static void testPrimitiveReturnValueConversions() throws Throwable {
+    MethodHandle mh = MethodHandles.lookup().findStatic(
+        Math.class, "min", MethodType.methodType(int.class, int.class, int.class));
+
+    final int SMALL = -8972;
+    final int LARGE = 7932529;
+
+    // No conversion
+    if ((int) mh.invokeExact(LARGE, SMALL) != SMALL) {
+      fail();
+    } else if ((int) mh.invoke(LARGE, SMALL) != SMALL) {
+      fail();
+    } else if ((int) mh.invokeExact(SMALL, LARGE) != SMALL) {
+      fail();
+    } else if ((int) mh.invoke(SMALL, LARGE) != SMALL) {
+      fail();
+    }
+
+    // int -> long
+    try {
+      if ((long) mh.invokeExact(LARGE, SMALL) != (long) SMALL) {}
+        fail();
+    } catch (WrongMethodTypeException e) {}
+
+    if ((long) mh.invoke(LARGE, SMALL) != (long) SMALL) {
+      fail();
+    }
+
+    // int -> short
+    try {
+      if ((short) mh.invokeExact(LARGE, SMALL) != (short) SMALL) {}
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    try {
+      if ((short) mh.invoke(LARGE, SMALL) != (short) SMALL) {
+        fail();
+      }
+    } catch (WrongMethodTypeException e) {}
+
+    // int -> Integer
+    try {
+      if (!((Integer) mh.invokeExact(LARGE, SMALL)).equals(new Integer(SMALL))) {}
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    if (!((Integer) mh.invoke(LARGE, SMALL)).equals(new Integer(SMALL))) {
+      fail();
+    }
+
+    // int -> Long
+    try {
+      Long l = (Long) mh.invokeExact(LARGE, SMALL);
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    try {
+      Long l = (Long) mh.invoke(LARGE, SMALL);
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    // int -> Short
+    try {
+      Short s = (Short) mh.invokeExact(LARGE, SMALL);
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    try {
+      Short s = (Short) mh.invoke(LARGE, SMALL);
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    // int -> Process
+    try {
+      Process p = (Process) mh.invokeExact(LARGE, SMALL);
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    try {
+      Process p = (Process) mh.invoke(LARGE, SMALL);
+      fail();
+    } catch (WrongMethodTypeException e) {}
+
+    // void -> Object
+    mh = MethodHandles.lookup().findStatic(System.class, "gc", MethodType.methodType(void.class));
+    Object o = (Object) mh.invoke();
+    if (o != null) fail();
+
+    // void -> long
+    long l = (long) mh.invoke();
+    if (l != 0) fail();
+
+    // boolean -> Boolean
+    mh = MethodHandles.lookup().findStatic(Boolean.class, "parseBoolean",
+                                           MethodType.methodType(boolean.class, String.class));
+    Boolean z = (Boolean) mh.invoke("True");
+    if (!z.booleanValue()) fail();
+
+    // boolean -> int
+    try {
+        int dummy = (int) mh.invoke("True");
+        fail();
+    } catch (WrongMethodTypeException e) {}
+
+    // boolean -> Integer
+    try {
+        Integer dummy = (Integer) mh.invoke("True");
+        fail();
+    } catch (WrongMethodTypeException e) {}
+
+    // Boolean -> boolean
+    mh = MethodHandles.lookup().findStatic(Boolean.class, "valueOf",
+                                           MethodType.methodType(Boolean.class, boolean.class));
+    boolean w = (boolean) mh.invoke(false);
+    if (w) fail();
+
+    // Boolean -> int
+    try {
+        int dummy = (int) mh.invoke(false);
+        fail();
+    } catch (WrongMethodTypeException e) {}
+
+    // Boolean -> Integer
+    try {
+        Integer dummy = (Integer) mh.invoke("True");
+        fail();
+    } catch (WrongMethodTypeException e) {}
+
+    System.out.println("testPrimitiveReturnValueConversions done.");
+  }
+
+  public static void testReturnValueConversions() throws Throwable {
+    testReferenceReturnValueConversions();
+    testPrimitiveReturnValueConversions();
+  }
+}
diff --git a/test/957-methodhandle-transforms/build b/test/957-methodhandle-transforms/build
new file mode 100755
index 0000000..a423ca6
--- /dev/null
+++ b/test/957-methodhandle-transforms/build
@@ -0,0 +1,25 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+if [[ $@ != *"--jvm"* ]]; then
+  # Don't do anything with jvm.
+  export USE_JACK=true
+fi
+
+./default-build "$@" --experimental method-handles
diff --git a/test/957-methodhandle-transforms/expected.txt b/test/957-methodhandle-transforms/expected.txt
new file mode 100644
index 0000000..7540ef7
--- /dev/null
+++ b/test/957-methodhandle-transforms/expected.txt
@@ -0,0 +1,18 @@
+Message: foo, Message2: 42
+Message: foo, Message2: 42
+Message: foo, Message2: 42
+Message: foo, Message2: 42
+Message: foo, Message2: 42
+Message: foo, Message2: 42
+Message: foo, Message2: 42
+Target: Arg1: foo, Arg2: 42
+Target: Arg1: foo, Arg2: 42
+Handler: java.lang.IllegalArgumentException: exceptionMessage, Arg1: foo, Arg2: 42, ExMsg: exceptionMessage
+Handler: java.lang.IllegalArgumentException: exceptionMessage, Arg1: foo, Arg2: 42, ExMsg: exceptionMessage
+Handler: java.lang.IllegalArgumentException: exceptionMessage, Arg1: foo
+Handler: java.lang.IllegalArgumentException: exceptionMessage, Arg1: foo
+target: target, 42, 56
+target: target, 42, 56
+fallback: fallback, 42, 56
+target: target, 42, 56
+target: target, 42, 56
diff --git a/test/957-methodhandle-transforms/info.txt b/test/957-methodhandle-transforms/info.txt
new file mode 100644
index 0000000..bc50e85
--- /dev/null
+++ b/test/957-methodhandle-transforms/info.txt
@@ -0,0 +1,3 @@
+Tests for method handle transformations.
+
+NOTE: needs to run under ART or a Java 8 Language runtime and compiler.
diff --git a/test/957-methodhandle-transforms/run b/test/957-methodhandle-transforms/run
new file mode 100755
index 0000000..a9f1822
--- /dev/null
+++ b/test/957-methodhandle-transforms/run
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+./default-run "$@" --experimental method-handles
diff --git a/test/957-methodhandle-transforms/src/Main.java b/test/957-methodhandle-transforms/src/Main.java
new file mode 100644
index 0000000..5806509
--- /dev/null
+++ b/test/957-methodhandle-transforms/src/Main.java
@@ -0,0 +1,907 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.MethodHandles.Lookup;
+import java.lang.invoke.MethodType;
+import java.lang.invoke.WrongMethodTypeException;
+
+public class Main {
+  public static void main(String[] args) throws Throwable {
+    testThrowException();
+    testDropArguments();
+    testCatchException();
+    testGuardWithTest();
+    testArrayElementGetter();
+    testArrayElementSetter();
+    testIdentity();
+    testConstant();
+    testBindTo();
+    testFilterReturnValue();
+    testPermuteArguments();
+  }
+
+  public static void testThrowException() throws Throwable {
+    MethodHandle handle = MethodHandles.throwException(String.class,
+        IllegalArgumentException.class);
+
+    if (handle.type().returnType() != String.class) {
+      System.out.println("Unexpected return type for handle: " + handle +
+          " [ " + handle.type() + "]");
+    }
+
+    final IllegalArgumentException iae = new IllegalArgumentException("boo!");
+    try {
+      handle.invoke(iae);
+      System.out.println("Expected an exception of type: java.lang.IllegalArgumentException");
+    } catch (IllegalArgumentException expected) {
+      if (expected != iae) {
+        System.out.println("Wrong exception: expected " + iae + " but was " + expected);
+      }
+    }
+  }
+
+  public static void dropArguments_delegate(String message, long message2) {
+    System.out.println("Message: " + message + ", Message2: " + message2);
+  }
+
+  public static void testDropArguments() throws Throwable {
+    MethodHandle delegate = MethodHandles.lookup().findStatic(Main.class,
+        "dropArguments_delegate",
+        MethodType.methodType(void.class, new Class<?>[] { String.class, long.class }));
+
+    MethodHandle transform = MethodHandles.dropArguments(delegate, 0, int.class, Object.class);
+
+    // The transformer will accept two additional arguments at position zero.
+    try {
+      transform.invokeExact("foo", 42l);
+      fail();
+    } catch (WrongMethodTypeException expected) {
+    }
+
+    transform.invokeExact(45, new Object(), "foo", 42l);
+    transform.invoke(45, new Object(), "foo", 42l);
+
+    // Additional arguments at position 1.
+    transform = MethodHandles.dropArguments(delegate, 1, int.class, Object.class);
+    transform.invokeExact("foo", 45, new Object(), 42l);
+    transform.invoke("foo", 45, new Object(), 42l);
+
+    // Additional arguments at position 2.
+    transform = MethodHandles.dropArguments(delegate, 2, int.class, Object.class);
+    transform.invokeExact("foo", 42l, 45, new Object());
+    transform.invoke("foo", 42l, 45, new Object());
+
+    // Note that we still perform argument conversions even for the arguments that
+    // are subsequently dropped.
+    try {
+      transform.invoke("foo", 42l, 45l, new Object());
+      fail();
+    } catch (WrongMethodTypeException expected) {
+    } catch (IllegalArgumentException expected) {
+      // TODO(narayan): We currently throw the wrong type of exception here,
+      // it's IAE and should be WMTE instead.
+    }
+
+    // Check that asType works as expected.
+    transform = MethodHandles.dropArguments(delegate, 0, int.class, Object.class);
+    transform = transform.asType(MethodType.methodType(void.class,
+          new Class<?>[] { short.class, Object.class, String.class, long.class }));
+    transform.invokeExact((short) 45, new Object(), "foo", 42l);
+
+    // Invalid argument location, should not be allowed.
+    try {
+      MethodHandles.dropArguments(delegate, -1, int.class, Object.class);
+      fail();
+    } catch (IllegalArgumentException expected) {
+    }
+
+    // Invalid argument location, should not be allowed.
+    try {
+      MethodHandles.dropArguments(delegate, 3, int.class, Object.class);
+      fail();
+    } catch (IllegalArgumentException expected) {
+    }
+
+    try {
+      MethodHandles.dropArguments(delegate, 1, void.class);
+      fail();
+    } catch (IllegalArgumentException expected) {
+    }
+  }
+
+  public static String testCatchException_target(String arg1, long arg2, String exceptionMessage)
+      throws Throwable {
+    if (exceptionMessage != null) {
+      throw new IllegalArgumentException(exceptionMessage);
+    }
+
+    System.out.println("Target: Arg1: " + arg1 + ", Arg2: " + arg2);
+    return "target";
+  }
+
+  public static String testCatchException_handler(IllegalArgumentException iae, String arg1, long arg2,
+      String exMsg) {
+    System.out.println("Handler: " + iae + ", Arg1: " + arg1 + ", Arg2: " + arg2 + ", ExMsg: " + exMsg);
+    return "handler1";
+  }
+
+  public static String testCatchException_handler2(IllegalArgumentException iae, String arg1) {
+    System.out.println("Handler: " + iae + ", Arg1: " + arg1);
+    return "handler2";
+  }
+
+  public static void testCatchException() throws Throwable {
+    MethodHandle target = MethodHandles.lookup().findStatic(Main.class,
+        "testCatchException_target",
+        MethodType.methodType(String.class, new Class<?>[] { String.class, long.class, String.class }));
+
+    MethodHandle handler = MethodHandles.lookup().findStatic(Main.class,
+        "testCatchException_handler",
+        MethodType.methodType(String.class, new Class<?>[] { IllegalArgumentException.class,
+            String.class, long.class, String.class }));
+
+    MethodHandle adapter = MethodHandles.catchException(target, IllegalArgumentException.class,
+        handler);
+
+    String returnVal = null;
+
+    // These two should end up calling the target always. We're passing a null exception
+    // message here, which means the target will not throw.
+    returnVal = (String) adapter.invoke("foo", 42, null);
+    assertEquals("target", returnVal);
+    returnVal = (String) adapter.invokeExact("foo", 42l, (String) null);
+    assertEquals("target", returnVal);
+
+    // We're passing a non-null exception message here, which means the target will throw,
+    // which in turn means that the handler must be called for the next two invokes.
+    returnVal = (String) adapter.invoke("foo", 42, "exceptionMessage");
+    assertEquals("handler1", returnVal);
+    returnVal = (String) adapter.invokeExact("foo", 42l, "exceptionMessage");
+    assertEquals("handler1", returnVal);
+
+    handler = MethodHandles.lookup().findStatic(Main.class,
+        "testCatchException_handler2",
+        MethodType.methodType(String.class, new Class<?>[] { IllegalArgumentException.class,
+            String.class }));
+    adapter = MethodHandles.catchException(target, IllegalArgumentException.class, handler);
+
+    returnVal = (String) adapter.invoke("foo", 42, "exceptionMessage");
+    assertEquals("handler2", returnVal);
+    returnVal = (String) adapter.invokeExact("foo", 42l, "exceptionMessage");
+    assertEquals("handler2", returnVal);
+
+    // Test that the type of the invoke doesn't matter. Here we call
+    // IllegalArgumentException.toString() on the exception that was thrown by
+    // the target.
+    handler = MethodHandles.lookup().findVirtual(IllegalArgumentException.class,
+        "toString", MethodType.methodType(String.class));
+    adapter = MethodHandles.catchException(target, IllegalArgumentException.class, handler);
+
+    returnVal = (String) adapter.invoke("foo", 42, "exceptionMessage");
+    assertEquals("java.lang.IllegalArgumentException: exceptionMessage", returnVal);
+    returnVal = (String) adapter.invokeExact("foo", 42l, "exceptionMessage2");
+    assertEquals("java.lang.IllegalArgumentException: exceptionMessage2", returnVal);
+
+    // Check that asType works as expected.
+    adapter = MethodHandles.catchException(target, IllegalArgumentException.class,
+        handler);
+    adapter = adapter.asType(MethodType.methodType(String.class,
+          new Class<?>[] { String.class, int.class, String.class }));
+    returnVal = (String) adapter.invokeExact("foo", 42, "exceptionMessage");
+    assertEquals("java.lang.IllegalArgumentException: exceptionMessage", returnVal);
+  }
+
+  public static boolean testGuardWithTest_test(String arg1, long arg2) {
+    return "target".equals(arg1) && 42 == arg2;
+  }
+
+  public static String testGuardWithTest_target(String arg1, long arg2, int arg3) {
+    System.out.println("target: " + arg1 + ", " + arg2  + ", " + arg3);
+    return "target";
+  }
+
+  public static String testGuardWithTest_fallback(String arg1, long arg2, int arg3) {
+    System.out.println("fallback: " + arg1 + ", " + arg2  + ", " + arg3);
+    return "fallback";
+  }
+
+  public static void testGuardWithTest() throws Throwable {
+    MethodHandle test = MethodHandles.lookup().findStatic(Main.class,
+        "testGuardWithTest_test",
+        MethodType.methodType(boolean.class, new Class<?>[] { String.class, long.class }));
+
+    final MethodType type = MethodType.methodType(String.class,
+        new Class<?>[] { String.class, long.class, int.class });
+
+    final MethodHandle target = MethodHandles.lookup().findStatic(Main.class,
+        "testGuardWithTest_target", type);
+    final MethodHandle fallback = MethodHandles.lookup().findStatic(Main.class,
+        "testGuardWithTest_fallback", type);
+
+    MethodHandle adapter = MethodHandles.guardWithTest(test, target, fallback);
+
+    String returnVal = null;
+
+    returnVal = (String) adapter.invoke("target", 42, 56);
+    assertEquals("target", returnVal);
+    returnVal = (String) adapter.invokeExact("target", 42l, 56);
+    assertEquals("target", returnVal);
+
+    returnVal = (String) adapter.invoke("fallback", 42l, 56);
+    assertEquals("fallback", returnVal);
+    returnVal = (String) adapter.invokeExact("target", 42l, 56);
+    assertEquals("target", returnVal);
+
+    // Check that asType works as expected.
+    adapter = adapter.asType(MethodType.methodType(String.class,
+          new Class<?>[] { String.class, int.class, int.class }));
+    returnVal = (String) adapter.invokeExact("target", 42, 56);
+    assertEquals("target", returnVal);
+  }
+
+  public static void testArrayElementGetter() throws Throwable {
+    MethodHandle getter = MethodHandles.arrayElementGetter(int[].class);
+
+    {
+      int[] array = new int[1];
+      array[0] = 42;
+      int value = (int) getter.invoke(array, 0);
+      if (value != 42) {
+        System.out.println("Unexpected value: " + value);
+      }
+
+      try {
+        value = (int) getter.invoke(array, -1);
+        fail();
+      } catch (ArrayIndexOutOfBoundsException expected) {
+      }
+
+      try {
+        value = (int) getter.invoke(null, -1);
+        fail();
+      } catch (NullPointerException expected) {
+      }
+    }
+
+    {
+      getter = MethodHandles.arrayElementGetter(long[].class);
+      long[] array = new long[1];
+      array[0] = 42;
+      long value = (long) getter.invoke(array, 0);
+      if (value != 42l) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      getter = MethodHandles.arrayElementGetter(short[].class);
+      short[] array = new short[1];
+      array[0] = 42;
+      short value = (short) getter.invoke(array, 0);
+      if (value != 42l) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      getter = MethodHandles.arrayElementGetter(char[].class);
+      char[] array = new char[1];
+      array[0] = 42;
+      char value = (char) getter.invoke(array, 0);
+      if (value != 42l) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      getter = MethodHandles.arrayElementGetter(byte[].class);
+      byte[] array = new byte[1];
+      array[0] = (byte) 0x8;
+      byte value = (byte) getter.invoke(array, 0);
+      if (value != (byte) 0x8) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      getter = MethodHandles.arrayElementGetter(boolean[].class);
+      boolean[] array = new boolean[1];
+      array[0] = true;
+      boolean value = (boolean) getter.invoke(array, 0);
+      if (!value) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      getter = MethodHandles.arrayElementGetter(float[].class);
+      float[] array = new float[1];
+      array[0] = 42.0f;
+      float value = (float) getter.invoke(array, 0);
+      if (value != 42.0f) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      getter = MethodHandles.arrayElementGetter(double[].class);
+      double[] array = new double[1];
+      array[0] = 42.0;
+      double value = (double) getter.invoke(array, 0);
+      if (value != 42.0) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      getter = MethodHandles.arrayElementGetter(String[].class);
+      String[] array = new String[3];
+      array[0] = "42";
+      array[1] = "48";
+      array[2] = "54";
+      String value = (String) getter.invoke(array, 0);
+      assertEquals("42", value);
+      value = (String) getter.invoke(array, 1);
+      assertEquals("48", value);
+      value = (String) getter.invoke(array, 2);
+      assertEquals("54", value);
+    }
+  }
+
+  public static void testArrayElementSetter() throws Throwable {
+    MethodHandle setter = MethodHandles.arrayElementSetter(int[].class);
+
+    {
+      int[] array = new int[2];
+      setter.invoke(array, 0, 42);
+      setter.invoke(array, 1, 43);
+
+      if (array[0] != 42) {
+        System.out.println("Unexpected value: " + array[0]);
+      }
+      if (array[1] != 43) {
+        System.out.println("Unexpected value: " + array[1]);
+      }
+
+      try {
+        setter.invoke(array, -1, 42);
+        fail();
+      } catch (ArrayIndexOutOfBoundsException expected) {
+      }
+
+      try {
+        setter.invoke(null, 0, 42);
+        fail();
+      } catch (NullPointerException expected) {
+      }
+    }
+
+    {
+      setter = MethodHandles.arrayElementSetter(long[].class);
+      long[] array = new long[1];
+      setter.invoke(array, 0, 42l);
+      if (array[0] != 42l) {
+        System.out.println("Unexpected value: " + array[0]);
+      }
+    }
+
+    {
+      setter = MethodHandles.arrayElementSetter(short[].class);
+      short[] array = new short[1];
+      setter.invoke(array, 0, (short) 42);
+      if (array[0] != 42l) {
+        System.out.println("Unexpected value: " + array[0]);
+      }
+    }
+
+    {
+      setter = MethodHandles.arrayElementSetter(char[].class);
+      char[] array = new char[1];
+      setter.invoke(array, 0, (char) 42);
+      if (array[0] != 42) {
+        System.out.println("Unexpected value: " + array[0]);
+      }
+    }
+
+    {
+      setter = MethodHandles.arrayElementSetter(byte[].class);
+      byte[] array = new byte[1];
+      setter.invoke(array, 0, (byte) 0x8);
+      if (array[0] != (byte) 0x8) {
+        System.out.println("Unexpected value: " + array[0]);
+      }
+    }
+
+    {
+      setter = MethodHandles.arrayElementSetter(boolean[].class);
+      boolean[] array = new boolean[1];
+      setter.invoke(array, 0, true);
+      if (!array[0]) {
+        System.out.println("Unexpected value: " + array[0]);
+      }
+    }
+
+    {
+      setter = MethodHandles.arrayElementSetter(float[].class);
+      float[] array = new float[1];
+      setter.invoke(array, 0, 42.0f);
+      if (array[0] != 42.0f) {
+        System.out.println("Unexpected value: " + array[0]);
+      }
+    }
+
+    {
+      setter = MethodHandles.arrayElementSetter(double[].class);
+      double[] array = new double[1];
+      setter.invoke(array, 0, 42.0);
+      if (array[0] != 42.0) {
+        System.out.println("Unexpected value: " + array[0]);
+      }
+    }
+
+    {
+      setter = MethodHandles.arrayElementSetter(String[].class);
+      String[] array = new String[3];
+      setter.invoke(array, 0, "42");
+      setter.invoke(array, 1, "48");
+      setter.invoke(array, 2, "54");
+      assertEquals("42", array[0]);
+      assertEquals("48", array[1]);
+      assertEquals("54", array[2]);
+    }
+  }
+
+  public static void testIdentity() throws Throwable {
+    {
+      MethodHandle identity = MethodHandles.identity(boolean.class);
+      boolean value = (boolean) identity.invoke(false);
+      if (value) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      MethodHandle identity = MethodHandles.identity(byte.class);
+      byte value = (byte) identity.invoke((byte) 0x8);
+      if (value != (byte) 0x8) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      MethodHandle identity = MethodHandles.identity(char.class);
+      char value = (char) identity.invoke((char) -56);
+      if (value != (char) -56) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      MethodHandle identity = MethodHandles.identity(short.class);
+      short value = (short) identity.invoke((short) -59);
+      if (value != (short) -59) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      MethodHandle identity = MethodHandles.identity(int.class);
+      int value = (int) identity.invoke(52);
+      if (value != 52) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      MethodHandle identity = MethodHandles.identity(long.class);
+      long value = (long) identity.invoke(-76l);
+      if (value != (long) -76) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      MethodHandle identity = MethodHandles.identity(float.class);
+      float value = (float) identity.invoke(56.0f);
+      if (value != (float) 56.0f) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      MethodHandle identity = MethodHandles.identity(double.class);
+      double value = (double) identity.invoke((double) 72.0);
+      if (value != (double) 72.0) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    {
+      MethodHandle identity = MethodHandles.identity(String.class);
+      String value = (String) identity.invoke("bazman");
+      assertEquals("bazman", value);
+    }
+  }
+
+  public static void testConstant() throws Throwable {
+    // int constants.
+    {
+      MethodHandle constant = MethodHandles.constant(int.class, 56);
+      int value = (int) constant.invoke();
+      if (value != 56) {
+        System.out.println("Unexpected value: " + value);
+      }
+
+      // short constant values are converted to int.
+      constant = MethodHandles.constant(int.class, (short) 52);
+      value = (int) constant.invoke();
+      if (value != 52) {
+        System.out.println("Unexpected value: " + value);
+      }
+
+      // char constant values are converted to int.
+      constant = MethodHandles.constant(int.class, (char) 'b');
+      value = (int) constant.invoke();
+      if (value != (int) 'b') {
+        System.out.println("Unexpected value: " + value);
+      }
+
+      // int constant values are converted to int.
+      constant = MethodHandles.constant(int.class, (byte) 0x1);
+      value = (int) constant.invoke();
+      if (value != 1) {
+        System.out.println("Unexpected value: " + value);
+      }
+
+      // boolean, float, double and long primitive constants are not convertible
+      // to int, so the handle creation must fail with a CCE.
+      try {
+        MethodHandles.constant(int.class, false);
+        fail();
+      } catch (ClassCastException expected) {
+      }
+
+      try {
+        MethodHandles.constant(int.class, 0.1f);
+        fail();
+      } catch (ClassCastException expected) {
+      }
+
+      try {
+        MethodHandles.constant(int.class, 0.2);
+        fail();
+      } catch (ClassCastException expected) {
+      }
+
+      try {
+        MethodHandles.constant(int.class, 73l);
+        fail();
+      } catch (ClassCastException expected) {
+      }
+    }
+
+    // long constants.
+    {
+      MethodHandle constant = MethodHandles.constant(long.class, 56l);
+      long value = (long) constant.invoke();
+      if (value != 56l) {
+        System.out.println("Unexpected value: " + value);
+      }
+
+      constant = MethodHandles.constant(long.class, (int) 56);
+      value = (long) constant.invoke();
+      if (value != 56l) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    // byte constants.
+    {
+      MethodHandle constant = MethodHandles.constant(byte.class, (byte) 0x12);
+      byte value = (byte) constant.invoke();
+      if (value != (byte) 0x12) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    // boolean constants.
+    {
+      MethodHandle constant = MethodHandles.constant(boolean.class, true);
+      boolean value = (boolean) constant.invoke();
+      if (!value) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    // char constants.
+    {
+      MethodHandle constant = MethodHandles.constant(char.class, 'f');
+      char value = (char) constant.invoke();
+      if (value != 'f') {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    // short constants.
+    {
+      MethodHandle constant = MethodHandles.constant(short.class, (short) 123);
+      short value = (short) constant.invoke();
+      if (value != (short) 123) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    // float constants.
+    {
+      MethodHandle constant = MethodHandles.constant(float.class, 56.0f);
+      float value = (float) constant.invoke();
+      if (value != 56.0f) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    // double constants.
+    {
+      MethodHandle constant = MethodHandles.constant(double.class, 256.0);
+      double value = (double) constant.invoke();
+      if (value != 256.0) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    // reference constants.
+    {
+      MethodHandle constant = MethodHandles.constant(String.class, "256.0");
+      String value = (String) constant.invoke();
+      assertEquals("256.0", value);
+    }
+  }
+
+  public static void testBindTo() throws Throwable {
+    MethodHandle stringCharAt = MethodHandles.lookup().findVirtual(
+        String.class, "charAt", MethodType.methodType(char.class, int.class));
+
+    char value = (char) stringCharAt.invoke("foo", 0);
+    if (value != 'f') {
+      System.out.println("Unexpected value: " + value);
+    }
+
+    MethodHandle bound = stringCharAt.bindTo("foo");
+    value = (char) bound.invoke(0);
+    if (value != 'f') {
+      System.out.println("Unexpected value: " + value);
+    }
+
+    try {
+      stringCharAt.bindTo(new Object());
+      fail();
+    } catch (ClassCastException expected) {
+    }
+
+    bound = stringCharAt.bindTo(null);
+    try {
+      bound.invoke(0);
+      fail();
+    } catch (NullPointerException expected) {
+    }
+
+    MethodHandle integerParseInt = MethodHandles.lookup().findStatic(
+        Integer.class, "parseInt", MethodType.methodType(int.class, String.class));
+
+    bound = integerParseInt.bindTo("78452");
+    int intValue = (int) bound.invoke();
+    if (intValue != 78452) {
+      System.out.println("Unexpected value: " + intValue);
+    }
+  }
+
+  public static String filterReturnValue_target(int a) {
+    return "ReturnValue" + a;
+  }
+
+  public static boolean filterReturnValue_filter(String value) {
+    return value.indexOf("42") != -1;
+  }
+
+  public static int filterReturnValue_intTarget(String a) {
+    return Integer.parseInt(a);
+  }
+
+  public static int filterReturnValue_intFilter(int b) {
+    return b + 1;
+  }
+
+  public static void filterReturnValue_voidTarget() {
+  }
+
+  public static int filterReturnValue_voidFilter() {
+    return 42;
+  }
+
+  public static void testFilterReturnValue() throws Throwable {
+    // A target that returns a reference.
+    {
+      final MethodHandle target = MethodHandles.lookup().findStatic(Main.class,
+          "filterReturnValue_target", MethodType.methodType(String.class, int.class));
+      final MethodHandle filter = MethodHandles.lookup().findStatic(Main.class,
+          "filterReturnValue_filter", MethodType.methodType(boolean.class, String.class));
+
+      MethodHandle adapter = MethodHandles.filterReturnValue(target, filter);
+
+      boolean value = (boolean) adapter.invoke((int) 42);
+      if (!value) {
+        System.out.println("Unexpected value: " + value);
+      }
+      value = (boolean) adapter.invoke((int) 43);
+      if (value) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    // A target that returns a primitive.
+    {
+      final MethodHandle target = MethodHandles.lookup().findStatic(Main.class,
+          "filterReturnValue_intTarget", MethodType.methodType(int.class, String.class));
+      final MethodHandle filter = MethodHandles.lookup().findStatic(Main.class,
+          "filterReturnValue_intFilter", MethodType.methodType(int.class, int.class));
+
+      MethodHandle adapter = MethodHandles.filterReturnValue(target, filter);
+
+      int value = (int) adapter.invoke("56");
+      if (value != 57) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+
+    // A target that returns void.
+    {
+      final MethodHandle target = MethodHandles.lookup().findStatic(Main.class,
+          "filterReturnValue_voidTarget", MethodType.methodType(void.class));
+      final MethodHandle filter = MethodHandles.lookup().findStatic(Main.class,
+          "filterReturnValue_voidFilter", MethodType.methodType(int.class));
+
+      MethodHandle adapter = MethodHandles.filterReturnValue(target, filter);
+
+      int value = (int) adapter.invoke();
+      if (value != 42) {
+        System.out.println("Unexpected value: " + value);
+      }
+    }
+  }
+
+  public static void permuteArguments_callee(boolean a, byte b, char c,
+      short d, int e, long f, float g, double h) {
+    if (a == true && b == (byte) 'b' && c == 'c' && d == (short) 56 &&
+        e == 78 && f == (long) 97 && g == 98.0f && f == 97.0) {
+      return;
+    }
+
+    System.out.println("Unexpected arguments: " + a + ", " + b + ", " + c
+        + ", " + d + ", " + e + ", " + f + ", " + g + ", " + h);
+  }
+
+  public static void permuteArguments_boxingCallee(boolean a, Integer b) {
+    if (a && b.intValue() == 42) {
+      return;
+    }
+
+    System.out.println("Unexpected arguments: " + a + ", " + b);
+  }
+
+  public static void testPermuteArguments() throws Throwable {
+    {
+      final MethodHandle target = MethodHandles.lookup().findStatic(
+          Main.class, "permuteArguments_callee",
+          MethodType.methodType(void.class, new Class<?>[] {
+            boolean.class, byte.class, char.class, short.class, int.class,
+            long.class, float.class, double.class }));
+
+      final MethodType newType = MethodType.methodType(void.class, new Class<?>[] {
+        double.class, float.class, long.class, int.class, short.class, char.class,
+        byte.class, boolean.class });
+
+      final MethodHandle permutation = MethodHandles.permuteArguments(
+          target, newType, new int[] { 7, 6, 5, 4, 3, 2, 1, 0 });
+
+      permutation.invoke((double) 97.0, (float) 98.0f, (long) 97, 78,
+          (short) 56, 'c', (byte) 'b', (boolean) true);
+
+      // The permutation array was not of the right length.
+      try {
+        MethodHandles.permuteArguments(target, newType,
+            new int[] { 7 });
+        fail();
+      } catch (IllegalArgumentException expected) {
+      }
+
+      // The permutation array has an element that's out of bounds
+      // (there's no argument with idx == 8).
+      try {
+        MethodHandles.permuteArguments(target, newType,
+            new int[] { 8, 6, 5, 4, 3, 2, 1, 0 });
+        fail();
+      } catch (IllegalArgumentException expected) {
+      }
+
+      // The permutation array maps to an incorrect type.
+      try {
+        MethodHandles.permuteArguments(target, newType,
+            new int[] { 7, 7, 5, 4, 3, 2, 1, 0 });
+        fail();
+      } catch (IllegalArgumentException expected) {
+      }
+    }
+
+    // Tests for reference arguments as well as permutations that
+    // repeat arguments.
+    {
+      final MethodHandle target = MethodHandles.lookup().findVirtual(
+          String.class, "concat", MethodType.methodType(String.class, String.class));
+
+      final MethodType newType = MethodType.methodType(String.class, String.class,
+          String.class);
+
+      assertEquals("foobar", (String) target.invoke("foo", "bar"));
+
+      MethodHandle permutation = MethodHandles.permuteArguments(target,
+          newType, new int[] { 1, 0 });
+      assertEquals("barfoo", (String) permutation.invoke("foo", "bar"));
+
+      permutation = MethodHandles.permuteArguments(target, newType, new int[] { 0, 0 });
+      assertEquals("foofoo", (String) permutation.invoke("foo", "bar"));
+
+      permutation = MethodHandles.permuteArguments(target, newType, new int[] { 1, 1 });
+      assertEquals("barbar", (String) permutation.invoke("foo", "bar"));
+    }
+
+    // Tests for boxing and unboxing.
+    {
+      final MethodHandle target = MethodHandles.lookup().findStatic(
+          Main.class, "permuteArguments_boxingCallee",
+          MethodType.methodType(void.class, new Class<?>[] { boolean.class, Integer.class }));
+
+      final MethodType newType = MethodType.methodType(void.class,
+          new Class<?>[] { Integer.class, boolean.class });
+
+      MethodHandle permutation = MethodHandles.permuteArguments(target,
+          newType, new int[] { 1, 0 });
+
+      permutation.invoke(42, true);
+      permutation.invoke(42, Boolean.TRUE);
+      permutation.invoke(Integer.valueOf(42), true);
+      permutation.invoke(Integer.valueOf(42), Boolean.TRUE);
+    }
+  }
+
+  public static void fail() {
+    System.out.println("FAIL");
+    Thread.dumpStack();
+  }
+
+  public static void assertEquals(String s1, String s2) {
+    if (s1 == s2) {
+      return;
+    }
+
+    if (s1 != null && s2 != null && s1.equals(s2)) {
+      return;
+    }
+
+    throw new AssertionError("assertEquals s1: " + s1 + ", s2: " + s2);
+  }
+}
diff --git a/test/958-methodhandle-emulated-stackframe/build b/test/958-methodhandle-emulated-stackframe/build
new file mode 100755
index 0000000..a423ca6
--- /dev/null
+++ b/test/958-methodhandle-emulated-stackframe/build
@@ -0,0 +1,25 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+if [[ $@ != *"--jvm"* ]]; then
+  # Don't do anything with jvm.
+  export USE_JACK=true
+fi
+
+./default-build "$@" --experimental method-handles
diff --git a/test/958-methodhandle-emulated-stackframe/expected.txt b/test/958-methodhandle-emulated-stackframe/expected.txt
new file mode 100644
index 0000000..5f38259
--- /dev/null
+++ b/test/958-methodhandle-emulated-stackframe/expected.txt
@@ -0,0 +1,32 @@
+boolean: false
+char: h
+short: 56
+int: 72
+long: 2147483689
+float: 0.56
+double: 100.0
+String: hello
+Object: goodbye
+boolean: false
+char: h
+short: 56
+int: 72
+long: 73
+float: 0.56
+double: 100.0
+String: hello
+Object: goodbye
+true
+true
+a
+a
+42
+42
+43
+43
+43.0
+43.0
+43.0
+43.0
+plank
+plank
diff --git a/test/958-methodhandle-emulated-stackframe/info.txt b/test/958-methodhandle-emulated-stackframe/info.txt
new file mode 100644
index 0000000..bec2324
--- /dev/null
+++ b/test/958-methodhandle-emulated-stackframe/info.txt
@@ -0,0 +1,5 @@
+Tests for dalvik.system.EmulatedStackFrame, which is used to implement
+MethodHandle transformations. This is a separate test because it tests
+an implementation detail and hence cannot be used with --mode=jvm.
+
+NOTE: needs to run under ART or a Java 8 Language runtime and compiler.
diff --git a/test/958-methodhandle-emulated-stackframe/run b/test/958-methodhandle-emulated-stackframe/run
new file mode 100755
index 0000000..a9f1822
--- /dev/null
+++ b/test/958-methodhandle-emulated-stackframe/run
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+./default-run "$@" --experimental method-handles
diff --git a/test/958-methodhandle-emulated-stackframe/src/Main.java b/test/958-methodhandle-emulated-stackframe/src/Main.java
new file mode 100644
index 0000000..f739d47
--- /dev/null
+++ b/test/958-methodhandle-emulated-stackframe/src/Main.java
@@ -0,0 +1,175 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.MethodHandles.Lookup;
+import java.lang.invoke.MethodType;
+import java.lang.invoke.WrongMethodTypeException;
+import java.lang.invoke.Transformers.Transformer;
+
+import dalvik.system.EmulatedStackFrame;
+
+public class Main {
+
+  public static void testDelegate_allTypes(boolean z, char a, short b, int c, long d,
+                                           float e, double f, String g, Object h) {
+    System.out.println("boolean: " + z);
+    System.out.println("char: " + a);
+    System.out.println("short: " + b);
+    System.out.println("int: " + c);
+    System.out.println("long: " + d);
+    System.out.println("float: " + e);
+    System.out.println("double: " + f);
+    System.out.println("String: " + g);
+    System.out.println("Object: " + h);
+  }
+
+  public static boolean testDelegate_returnBoolean() {
+    return true;
+  }
+
+  public static char testDelegate_returnChar() {
+    return 'a';
+  }
+
+  public static int testDelegate_returnInt() {
+    return 42;
+  }
+
+  public static long testDelegate_returnLong() {
+    return 43;
+  }
+
+  public static float testDelegate_returnFloat() {
+    return 43.0f;
+  }
+
+  public static double testDelegate_returnDouble() {
+    return 43.0;
+  }
+
+  public static String testDelegate_returnString() {
+    return "plank";
+  }
+
+  public static class DelegatingTransformer extends Transformer {
+    private final MethodHandle delegate;
+
+    public DelegatingTransformer(MethodHandle delegate) {
+      super(delegate.type());
+      this.delegate = delegate;
+    }
+
+    @Override
+    public void transform(EmulatedStackFrame stackFrame) throws Throwable {
+      delegate.invoke(stackFrame);
+    }
+  }
+
+  public static void main(String[] args) throws Throwable {
+    MethodHandle specialFunctionHandle = MethodHandles.lookup().findStatic(
+        Main.class, "testDelegate_allTypes", MethodType.methodType(void.class,
+          new Class<?>[] { boolean.class, char.class, short.class, int.class, long.class,
+            float.class, double.class, String.class, Object.class }));
+
+    DelegatingTransformer delegate = new DelegatingTransformer(specialFunctionHandle);
+
+    // Test an exact invoke.
+    //
+    // Note that the shorter form below doesn't work and must be
+    // investigated on the jack side :  b/32536744
+    //
+    // delegate.invokeExact(false, 'h', (short) 56, 72, Integer.MAX_VALUE + 42l,
+    //    0.56f, 100.0d, "hello", (Object) "goodbye");
+
+    Object obj = "goodbye";
+    delegate.invokeExact(false, 'h', (short) 56, 72, Integer.MAX_VALUE + 42l,
+        0.56f, 100.0d, "hello", obj);
+
+    // Test a non exact invoke with one int -> long conversion and a float -> double
+    // conversion.
+    delegate.invoke(false, 'h', (short) 56, 72, 73,
+        0.56f, 100.0f, "hello", "goodbye");
+
+    // Should throw a WrongMethodTypeException if the types don't align.
+    try {
+      delegate.invoke(false);
+      throw new AssertionError("Call to invoke unexpectedly succeeded");
+    } catch (WrongMethodTypeException expected) {
+    }
+
+    // Test return values.
+
+    // boolean.
+    MethodHandle returner = MethodHandles.lookup().findStatic(
+        Main.class, "testDelegate_returnBoolean", MethodType.methodType(boolean.class));
+    delegate = new DelegatingTransformer(returner);
+
+    System.out.println((boolean) delegate.invoke());
+    System.out.println((boolean) delegate.invokeExact());
+
+    // char.
+    returner = MethodHandles.lookup().findStatic(
+        Main.class, "testDelegate_returnChar", MethodType.methodType(char.class));
+    delegate = new DelegatingTransformer(returner);
+
+    System.out.println((char) delegate.invoke());
+    System.out.println((char) delegate.invokeExact());
+
+    // int.
+    returner = MethodHandles.lookup().findStatic(
+        Main.class, "testDelegate_returnInt", MethodType.methodType(int.class));
+    delegate = new DelegatingTransformer(returner);
+
+    System.out.println((int) delegate.invoke());
+    System.out.println((int) delegate.invokeExact());
+
+    // long.
+    returner = MethodHandles.lookup().findStatic(
+        Main.class, "testDelegate_returnLong", MethodType.methodType(long.class));
+    delegate = new DelegatingTransformer(returner);
+
+    System.out.println((long) delegate.invoke());
+    System.out.println((long) delegate.invokeExact());
+
+    // float.
+    returner = MethodHandles.lookup().findStatic(
+        Main.class, "testDelegate_returnFloat", MethodType.methodType(float.class));
+    delegate = new DelegatingTransformer(returner);
+
+    System.out.println((float) delegate.invoke());
+    System.out.println((float) delegate.invokeExact());
+
+    // double.
+    returner = MethodHandles.lookup().findStatic(
+        Main.class, "testDelegate_returnDouble", MethodType.methodType(double.class));
+    delegate = new DelegatingTransformer(returner);
+
+    System.out.println((double) delegate.invoke());
+    System.out.println((double) delegate.invokeExact());
+
+    // references.
+    returner = MethodHandles.lookup().findStatic(
+        Main.class, "testDelegate_returnString", MethodType.methodType(String.class));
+    delegate = new DelegatingTransformer(returner);
+
+    System.out.println((String) delegate.invoke());
+    System.out.println((String) delegate.invokeExact());
+  }
+}
+
+
diff --git a/test/959-invoke-polymorphic-accessors/build b/test/959-invoke-polymorphic-accessors/build
new file mode 100644
index 0000000..a423ca6
--- /dev/null
+++ b/test/959-invoke-polymorphic-accessors/build
@@ -0,0 +1,25 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+if [[ $@ != *"--jvm"* ]]; then
+  # Don't do anything with jvm.
+  export USE_JACK=true
+fi
+
+./default-build "$@" --experimental method-handles
diff --git a/test/959-invoke-polymorphic-accessors/expected.txt b/test/959-invoke-polymorphic-accessors/expected.txt
new file mode 100644
index 0000000..de2916b
--- /dev/null
+++ b/test/959-invoke-polymorphic-accessors/expected.txt
@@ -0,0 +1,4 @@
+1515870810
+Passed MethodHandles.Lookup tests for accessors.
+Passed MethodHandle.invokeExact() tests for accessors.
+Passed MethodHandle.invoke() tests for accessors.
diff --git a/test/959-invoke-polymorphic-accessors/info.txt b/test/959-invoke-polymorphic-accessors/info.txt
new file mode 100644
index 0000000..b2f55f0
--- /dev/null
+++ b/test/959-invoke-polymorphic-accessors/info.txt
@@ -0,0 +1 @@
+This test requires Jack with invoke-polymorphic support.
diff --git a/test/959-invoke-polymorphic-accessors/run b/test/959-invoke-polymorphic-accessors/run
new file mode 100644
index 0000000..a9f1822
--- /dev/null
+++ b/test/959-invoke-polymorphic-accessors/run
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+./default-run "$@" --experimental method-handles
diff --git a/test/959-invoke-polymorphic-accessors/src/Main.java b/test/959-invoke-polymorphic-accessors/src/Main.java
new file mode 100644
index 0000000..b7ecf8e
--- /dev/null
+++ b/test/959-invoke-polymorphic-accessors/src/Main.java
@@ -0,0 +1,919 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.WrongMethodTypeException;
+
+public class Main {
+
+    public static class ValueHolder {
+        public boolean m_z = false;
+        public byte m_b = 0;
+        public char m_c = 'a';
+        public short m_s = 0;
+        public int m_i = 0;
+        public float m_f = 0.0f;
+        public double m_d = 0.0;
+        public long m_j = 0;
+        public String m_l = "a";
+
+        public static boolean s_z;
+        public static byte s_b;
+        public static char s_c;
+        public static short s_s;
+        public static int s_i;
+        public static float s_f;
+        public static double s_d;
+        public static long s_j;
+        public static String s_l;
+
+        public final int m_fi = 0xa5a5a5a5;
+        public static final int s_fi = 0x5a5a5a5a;
+    }
+
+    public static class Tester {
+        public static void assertActualAndExpectedMatch(boolean actual, boolean expected)
+                throws AssertionError {
+            if (actual != expected) {
+                throw new AssertionError("Actual != Expected (" + actual + " != " + expected + ")");
+            }
+        }
+
+        public static void assertTrue(boolean value) throws AssertionError {
+            if (!value) {
+                throw new AssertionError("Value is not true");
+            }
+        }
+
+        public static void unreachable() throws Throwable{
+            throw new Error("unreachable");
+        }
+    }
+
+    public static class InvokeExactTester extends Tester {
+        private enum PrimitiveType {
+            Boolean,
+            Byte,
+            Char,
+            Short,
+            Int,
+            Long,
+            Float,
+            Double,
+            String,
+        }
+
+        private enum AccessorType {
+            IPUT,
+            SPUT,
+            IGET,
+            SGET,
+        }
+
+        static void setByte(MethodHandle m, ValueHolder v, byte value, boolean expectFailure)
+                throws Throwable {
+            boolean exceptionThrown = false;
+            try {
+                if (v == null) {
+                    m.invokeExact(value);
+                }
+                else {
+                    m.invokeExact(v, value);
+                }
+            }
+            catch (WrongMethodTypeException e) {
+                exceptionThrown = true;
+            }
+            assertActualAndExpectedMatch(exceptionThrown, expectFailure);
+        }
+
+        static void setByte(MethodHandle m, byte value, boolean expectFailure) throws Throwable {
+            setByte(m, null, value, expectFailure);
+        }
+
+        static void getByte(MethodHandle m, ValueHolder v, byte value, boolean expectFailure)
+                throws Throwable {
+            boolean exceptionThrown = false;
+            try {
+                final byte got;
+                if (v == null) {
+                    got = (byte)m.invokeExact();
+                } else {
+                    got = (byte)m.invokeExact(v);
+                }
+                assertTrue(got == value);
+            }
+            catch (WrongMethodTypeException e) {
+                exceptionThrown = true;
+            }
+            assertActualAndExpectedMatch(exceptionThrown, expectFailure);
+        }
+
+        static void getByte(MethodHandle m, byte value, boolean expectFailure) throws Throwable {
+            getByte(m, null, value, expectFailure);
+        }
+
+        static void setChar(MethodHandle m, ValueHolder v, char value, boolean expectFailure)
+                throws Throwable {
+            boolean exceptionThrown = false;
+            try {
+                if (v == null) {
+                    m.invokeExact(value);
+                }
+                else {
+                    m.invokeExact(v, value);
+                }
+            }
+            catch (WrongMethodTypeException e) {
+                exceptionThrown = true;
+            }
+            assertActualAndExpectedMatch(exceptionThrown, expectFailure);
+        }
+
+        static void setChar(MethodHandle m, char value, boolean expectFailure) throws Throwable {
+            setChar(m, null, value, expectFailure);
+        }
+
+        static void getChar(MethodHandle m, ValueHolder v, char value, boolean expectFailure)
+                throws Throwable {
+            boolean exceptionThrown = false;
+            try {
+                final char got;
+                if (v == null) {
+                    got = (char)m.invokeExact();
+                } else {
+                    got = (char)m.invokeExact(v);
+                }
+                assertTrue(got == value);
+            }
+            catch (WrongMethodTypeException e) {
+                exceptionThrown = true;
+            }
+            assertActualAndExpectedMatch(exceptionThrown, expectFailure);
+        }
+
+        static void getChar(MethodHandle m, char value, boolean expectFailure) throws Throwable {
+            getChar(m, null, value, expectFailure);
+        }
+
+        static void setShort(MethodHandle m, ValueHolder v, short value, boolean expectFailure)
+                throws Throwable {
+            boolean exceptionThrown = false;
+            try {
+                if (v == null) {
+                    m.invokeExact(value);
+                }
+                else {
+                    m.invokeExact(v, value);
+                }
+            }
+            catch (WrongMethodTypeException e) {
+                exceptionThrown = true;
+            }
+            assertActualAndExpectedMatch(exceptionThrown, expectFailure);
+        }
+
+        static void setShort(MethodHandle m, short value, boolean expectFailure) throws Throwable {
+            setShort(m, null, value, expectFailure);
+        }
+
+        static void getShort(MethodHandle m, ValueHolder v, short value, boolean expectFailure)
+                throws Throwable {
+            boolean exceptionThrown = false;
+            try {
+                final short got = (v == null) ? (short)m.invokeExact() : (short)m.invokeExact(v);
+                assertTrue(got == value);
+            }
+            catch (WrongMethodTypeException e) {
+                exceptionThrown = true;
+            }
+            assertActualAndExpectedMatch(exceptionThrown, expectFailure);
+        }
+
+        static void getShort(MethodHandle m, short value, boolean expectFailure) throws Throwable {
+            getShort(m, null, value, expectFailure);
+        }
+
+        static void setInt(MethodHandle m, ValueHolder v, int value, boolean expectFailure)
+                throws Throwable {
+            boolean exceptionThrown = false;
+            try {
+                if (v == null) {
+                    m.invokeExact(value);
+                }
+                else {
+                    m.invokeExact(v, value);
+                }
+            }
+            catch (WrongMethodTypeException e) {
+                exceptionThrown = true;
+            }
+            assertActualAndExpectedMatch(exceptionThrown, expectFailure);
+        }
+
+        static void setInt(MethodHandle m, int value, boolean expectFailure) throws Throwable {
+            setInt(m, null, value, expectFailure);
+        }
+
+        static void getInt(MethodHandle m, ValueHolder v, int value, boolean expectFailure)
+                throws Throwable {
+            boolean exceptionThrown = false;
+            try {
+                final int got = (v == null) ? (int)m.invokeExact() : (int)m.invokeExact(v);
+                assertTrue(got == value);
+            }
+            catch (WrongMethodTypeException e) {
+                exceptionThrown = true;
+            }
+            assertActualAndExpectedMatch(exceptionThrown, expectFailure);
+        }
+
+        static void getInt(MethodHandle m, int value, boolean expectFailure) throws Throwable {
+            getInt(m, null, value, expectFailure);
+        }
+
+        static void setLong(MethodHandle m, ValueHolder v, long value, boolean expectFailure)
+                throws Throwable {
+            boolean exceptionThrown = false;
+            try {
+                if (v == null) {
+                    m.invokeExact(value);
+                }
+                else {
+                    m.invokeExact(v, value);
+                }
+            }
+            catch (WrongMethodTypeException e) {
+                exceptionThrown = true;
+            }
+            assertActualAndExpectedMatch(exceptionThrown, expectFailure);
+        }
+
+        static void setLong(MethodHandle m, long value, boolean expectFailure) throws Throwable {
+            setLong(m, null, value, expectFailure);
+        }
+
+        static void getLong(MethodHandle m, ValueHolder v, long value, boolean expectFailure)
+                throws Throwable {
+            boolean exceptionThrown = false;
+            try {
+                final long got = (v == null) ? (long)m.invokeExact() : (long)m.invokeExact(v);
+                assertTrue(got == value);
+            }
+            catch (WrongMethodTypeException e) {
+                exceptionThrown = true;
+            }
+            assertActualAndExpectedMatch(exceptionThrown, expectFailure);
+        }
+
+        static void getLong(MethodHandle m, long value, boolean expectFailure) throws Throwable {
+            getLong(m, null, value, expectFailure);
+        }
+
+        static void setFloat(MethodHandle m, ValueHolder v, float value, boolean expectFailure)
+                throws Throwable {
+            boolean exceptionThrown = false;
+            try {
+                if (v == null) {
+                    m.invokeExact(value);
+                }
+                else {
+                    m.invokeExact(v, value);
+                }
+            }
+            catch (WrongMethodTypeException e) {
+                exceptionThrown = true;
+            }
+            assertActualAndExpectedMatch(exceptionThrown, expectFailure);
+        }
+
+        static void setFloat(MethodHandle m, float value, boolean expectFailure) throws Throwable {
+            setFloat(m, null, value, expectFailure);
+        }
+
+        static void getFloat(MethodHandle m, ValueHolder v, float value, boolean expectFailure)
+                throws Throwable {
+            boolean exceptionThrown = false;
+            try {
+                final float got = (v == null) ? (float)m.invokeExact() : (float)m.invokeExact(v);
+                assertTrue(got == value);
+            }
+            catch (WrongMethodTypeException e) {
+                exceptionThrown = true;
+            }
+            assertActualAndExpectedMatch(exceptionThrown, expectFailure);
+        }
+
+        static void getFloat(MethodHandle m, float value, boolean expectFailure) throws Throwable {
+            getFloat(m, null, value, expectFailure);
+        }
+
+        static void setDouble(MethodHandle m, ValueHolder v, double value, boolean expectFailure)
+                throws Throwable {
+            boolean exceptionThrown = false;
+            try {
+                if (v == null) {
+                    m.invokeExact(value);
+                }
+                else {
+                    m.invokeExact(v, value);
+                }
+            }
+            catch (WrongMethodTypeException e) {
+                exceptionThrown = true;
+            }
+            assertActualAndExpectedMatch(exceptionThrown, expectFailure);
+        }
+
+        static void setDouble(MethodHandle m, double value, boolean expectFailure)
+                throws Throwable {
+            setDouble(m, null, value, expectFailure);
+        }
+
+        static void getDouble(MethodHandle m, ValueHolder v, double value, boolean expectFailure)
+                throws Throwable {
+            boolean exceptionThrown = false;
+            try {
+                final double got = (v == null) ? (double)m.invokeExact() : (double)m.invokeExact(v);
+                assertTrue(got == value);
+            }
+            catch (WrongMethodTypeException e) {
+                exceptionThrown = true;
+            }
+            assertActualAndExpectedMatch(exceptionThrown, expectFailure);
+        }
+
+        static void getDouble(MethodHandle m, double value, boolean expectFailure)
+                throws Throwable {
+            getDouble(m, null, value, expectFailure);
+        }
+
+        static void setString(MethodHandle m, ValueHolder v, String value, boolean expectFailure)
+                throws Throwable {
+            boolean exceptionThrown = false;
+            try {
+                if (v == null) {
+                    m.invokeExact(value);
+                }
+                else {
+                    m.invokeExact(v, value);
+                }
+            }
+            catch (WrongMethodTypeException e) {
+                exceptionThrown = true;
+            }
+            assertActualAndExpectedMatch(exceptionThrown, expectFailure);
+        }
+
+        static void setString(MethodHandle m, String value, boolean expectFailure)
+                throws Throwable {
+            setString(m, null, value, expectFailure);
+        }
+
+        static void getString(MethodHandle m, ValueHolder v, String value, boolean expectFailure)
+                throws Throwable {
+            boolean exceptionThrown = false;
+            try {
+                final String got = (v == null) ? (String)m.invokeExact() : (String)m.invokeExact(v);
+                assertTrue(got.equals(value));
+            }
+            catch (WrongMethodTypeException e) {
+                exceptionThrown = true;
+            }
+            assertActualAndExpectedMatch(exceptionThrown, expectFailure);
+        }
+
+        static void getString(MethodHandle m, String value, boolean expectFailure)
+                throws Throwable {
+            getString(m, null, value, expectFailure);
+        }
+
+        static void setBoolean(MethodHandle m, ValueHolder v, boolean value, boolean expectFailure)
+                throws Throwable {
+            boolean exceptionThrown = false;
+            try {
+                if (v == null) {
+                    m.invokeExact(value);
+                }
+                else {
+                    m.invokeExact(v, value);
+                }
+            }
+            catch (WrongMethodTypeException e) {
+                exceptionThrown = true;
+            }
+            assertActualAndExpectedMatch(exceptionThrown, expectFailure);
+        }
+
+        static void setBoolean(MethodHandle m, boolean value, boolean expectFailure)
+                throws Throwable {
+            setBoolean(m, null, value, expectFailure);
+        }
+
+        static void getBoolean(MethodHandle m, ValueHolder v, boolean value, boolean expectFailure)
+                throws Throwable {
+            boolean exceptionThrown = false;
+            try {
+                final boolean got =
+                        (v == null) ? (boolean)m.invokeExact() : (boolean)m.invokeExact(v);
+                assertTrue(got == value);
+            }
+            catch (WrongMethodTypeException e) {
+                exceptionThrown = true;
+            }
+            assertActualAndExpectedMatch(exceptionThrown, expectFailure);
+        }
+
+        static void getBoolean(MethodHandle m, boolean value, boolean expectFailure)
+                throws Throwable {
+            getBoolean(m, null, value, expectFailure);
+        }
+
+        static boolean resultFor(PrimitiveType actualType, PrimitiveType expectedType,
+                                 AccessorType actualAccessor,
+                                 AccessorType expectedAccessor) {
+            return (actualType != expectedType) || (actualAccessor != expectedAccessor);
+        }
+
+        static void tryAccessor(MethodHandle methodHandle,
+                                ValueHolder valueHolder,
+                                PrimitiveType primitive,
+                                Object value,
+                                AccessorType accessor) throws Throwable {
+            boolean booleanValue =
+                    value instanceof Boolean ? ((Boolean)value).booleanValue() : false;
+            setBoolean(methodHandle, valueHolder, booleanValue,
+                       resultFor(primitive, PrimitiveType.Boolean, accessor, AccessorType.IPUT));
+            setBoolean(methodHandle, booleanValue,
+                       resultFor(primitive, PrimitiveType.Boolean, accessor, AccessorType.SPUT));
+            getBoolean(methodHandle, valueHolder, booleanValue,
+                       resultFor(primitive, PrimitiveType.Boolean, accessor, AccessorType.IGET));
+            getBoolean(methodHandle, booleanValue,
+                       resultFor(primitive, PrimitiveType.Boolean, accessor, AccessorType.SGET));
+
+            byte byteValue = value instanceof Byte ? ((Byte)value).byteValue() : (byte)0;
+            setByte(methodHandle, valueHolder, byteValue,
+                    resultFor(primitive, PrimitiveType.Byte, accessor, AccessorType.IPUT));
+            setByte(methodHandle, byteValue,
+                    resultFor(primitive, PrimitiveType.Byte, accessor, AccessorType.SPUT));
+            getByte(methodHandle, valueHolder, byteValue,
+                    resultFor(primitive, PrimitiveType.Byte, accessor, AccessorType.IGET));
+            getByte(methodHandle, byteValue,
+                    resultFor(primitive, PrimitiveType.Byte, accessor, AccessorType.SGET));
+
+            char charValue = value instanceof Character ? ((Character)value).charValue() : 'z';
+            setChar(methodHandle, valueHolder, charValue,
+                    resultFor(primitive, PrimitiveType.Char, accessor, AccessorType.IPUT));
+            setChar(methodHandle, charValue,
+                    resultFor(primitive, PrimitiveType.Char, accessor, AccessorType.SPUT));
+            getChar(methodHandle, valueHolder, charValue,
+                    resultFor(primitive, PrimitiveType.Char, accessor, AccessorType.IGET));
+            getChar(methodHandle, charValue,
+                    resultFor(primitive, PrimitiveType.Char, accessor, AccessorType.SGET));
+
+            short shortValue = value instanceof Short ? ((Short)value).shortValue() : (short)0;
+            setShort(methodHandle, valueHolder, shortValue,
+                     resultFor(primitive, PrimitiveType.Short, accessor, AccessorType.IPUT));
+            setShort(methodHandle, shortValue,
+                    resultFor(primitive, PrimitiveType.Short, accessor, AccessorType.SPUT));
+            getShort(methodHandle, valueHolder, shortValue,
+                     resultFor(primitive, PrimitiveType.Short, accessor, AccessorType.IGET));
+            getShort(methodHandle, shortValue,
+                    resultFor(primitive, PrimitiveType.Short, accessor, AccessorType.SGET));
+
+            int intValue = value instanceof Integer ? ((Integer)value).intValue() : -1;
+            setInt(methodHandle, valueHolder, intValue,
+                   resultFor(primitive, PrimitiveType.Int, accessor, AccessorType.IPUT));
+            setInt(methodHandle, intValue,
+                   resultFor(primitive, PrimitiveType.Int, accessor, AccessorType.SPUT));
+            getInt(methodHandle, valueHolder, intValue,
+                   resultFor(primitive, PrimitiveType.Int, accessor, AccessorType.IGET));
+            getInt(methodHandle, intValue,
+                   resultFor(primitive, PrimitiveType.Int, accessor, AccessorType.SGET));
+
+            long longValue = value instanceof Long ? ((Long)value).longValue() : (long)-1;
+            setLong(methodHandle, valueHolder, longValue,
+                    resultFor(primitive, PrimitiveType.Long, accessor, AccessorType.IPUT));
+            setLong(methodHandle, longValue,
+                    resultFor(primitive, PrimitiveType.Long, accessor, AccessorType.SPUT));
+            getLong(methodHandle, valueHolder, longValue,
+                    resultFor(primitive, PrimitiveType.Long, accessor, AccessorType.IGET));
+            getLong(methodHandle, longValue,
+                    resultFor(primitive, PrimitiveType.Long, accessor, AccessorType.SGET));
+
+            float floatValue = value instanceof Float ? ((Float)value).floatValue() : -1.0f;
+            setFloat(methodHandle, valueHolder, floatValue,
+                    resultFor(primitive, PrimitiveType.Float, accessor, AccessorType.IPUT));
+            setFloat(methodHandle, floatValue,
+                    resultFor(primitive, PrimitiveType.Float, accessor, AccessorType.SPUT));
+            getFloat(methodHandle, valueHolder, floatValue,
+                    resultFor(primitive, PrimitiveType.Float, accessor, AccessorType.IGET));
+            getFloat(methodHandle, floatValue,
+                     resultFor(primitive, PrimitiveType.Float, accessor, AccessorType.SGET));
+
+            double doubleValue = value instanceof Double ? ((Double)value).doubleValue() : -1.0;
+            setDouble(methodHandle, valueHolder, doubleValue,
+                      resultFor(primitive, PrimitiveType.Double, accessor, AccessorType.IPUT));
+            setDouble(methodHandle, doubleValue,
+                      resultFor(primitive, PrimitiveType.Double, accessor, AccessorType.SPUT));
+            getDouble(methodHandle, valueHolder, doubleValue,
+                      resultFor(primitive, PrimitiveType.Double, accessor, AccessorType.IGET));
+            getDouble(methodHandle, doubleValue,
+                      resultFor(primitive, PrimitiveType.Double, accessor, AccessorType.SGET));
+
+            String stringValue = value instanceof String ? ((String) value) : "No Spock, no";
+            setString(methodHandle, valueHolder, stringValue,
+                      resultFor(primitive, PrimitiveType.String, accessor, AccessorType.IPUT));
+            setString(methodHandle, stringValue,
+                      resultFor(primitive, PrimitiveType.String, accessor, AccessorType.SPUT));
+            getString(methodHandle, valueHolder, stringValue,
+                      resultFor(primitive, PrimitiveType.String, accessor, AccessorType.IGET));
+            getString(methodHandle, stringValue,
+                      resultFor(primitive, PrimitiveType.String, accessor, AccessorType.SGET));
+        }
+
+        public static void main() throws Throwable {
+            ValueHolder valueHolder = new ValueHolder();
+            MethodHandles.Lookup lookup = MethodHandles.lookup();
+
+            boolean [] booleans = { false, true, false };
+            for (boolean b : booleans) {
+                Boolean boxed = new Boolean(b);
+                tryAccessor(lookup.findSetter(ValueHolder.class, "m_z", boolean.class),
+                            valueHolder, PrimitiveType.Boolean, boxed, AccessorType.IPUT);
+                tryAccessor(lookup.findGetter(ValueHolder.class, "m_z", boolean.class),
+                            valueHolder, PrimitiveType.Boolean, boxed, AccessorType.IGET);
+                assertTrue(valueHolder.m_z == b);
+                tryAccessor(lookup.findStaticSetter(ValueHolder.class, "s_z", boolean.class),
+                            valueHolder, PrimitiveType.Boolean, boxed, AccessorType.SPUT);
+                tryAccessor(lookup.findStaticGetter(ValueHolder.class, "s_z", boolean.class),
+                            valueHolder, PrimitiveType.Boolean, boxed, AccessorType.SGET);
+                assertTrue(ValueHolder.s_z == b);
+            }
+
+            byte [] bytes = { (byte)0x73, (byte)0xfe };
+            for (byte b : bytes) {
+                Byte boxed = new Byte(b);
+                tryAccessor(lookup.findSetter(ValueHolder.class, "m_b", byte.class),
+                            valueHolder, PrimitiveType.Byte, boxed, AccessorType.IPUT);
+                tryAccessor(lookup.findGetter(ValueHolder.class, "m_b", byte.class),
+                            valueHolder, PrimitiveType.Byte, boxed, AccessorType.IGET);
+                assertTrue(valueHolder.m_b == b);
+                tryAccessor(lookup.findStaticSetter(ValueHolder.class, "s_b", byte.class),
+                            valueHolder, PrimitiveType.Byte, boxed, AccessorType.SPUT);
+                tryAccessor(lookup.findStaticGetter(ValueHolder.class, "s_b", byte.class),
+                            valueHolder, PrimitiveType.Byte, boxed, AccessorType.SGET);
+                assertTrue(ValueHolder.s_b == b);
+            }
+
+            char [] chars = { 'a', 'b', 'c' };
+            for (char c : chars) {
+                Character boxed = new Character(c);
+                tryAccessor(lookup.findSetter(ValueHolder.class, "m_c", char.class),
+                            valueHolder, PrimitiveType.Char, boxed, AccessorType.IPUT);
+                tryAccessor(lookup.findGetter(ValueHolder.class, "m_c", char.class),
+                            valueHolder, PrimitiveType.Char, boxed, AccessorType.IGET);
+                assertTrue(valueHolder.m_c == c);
+                tryAccessor(lookup.findStaticSetter(ValueHolder.class, "s_c", char.class),
+                            valueHolder, PrimitiveType.Char, boxed, AccessorType.SPUT);
+                tryAccessor(lookup.findStaticGetter(ValueHolder.class, "s_c", char.class),
+                            valueHolder, PrimitiveType.Char, boxed, AccessorType.SGET);
+                assertTrue(ValueHolder.s_c == c);
+            }
+
+            short [] shorts = { (short)0x1234, (short)0x4321 };
+            for (short s : shorts) {
+                Short boxed = new Short(s);
+                tryAccessor(lookup.findSetter(ValueHolder.class, "m_s", short.class),
+                            valueHolder, PrimitiveType.Short, boxed, AccessorType.IPUT);
+                tryAccessor(lookup.findGetter(ValueHolder.class, "m_s", short.class),
+                            valueHolder, PrimitiveType.Short, boxed, AccessorType.IGET);
+                assertTrue(valueHolder.m_s == s);
+                tryAccessor(lookup.findStaticSetter(ValueHolder.class, "s_s", short.class),
+                            valueHolder, PrimitiveType.Short, boxed, AccessorType.SPUT);
+                tryAccessor(lookup.findStaticGetter(ValueHolder.class, "s_s", short.class),
+                            valueHolder, PrimitiveType.Short, boxed, AccessorType.SGET);
+                assertTrue(ValueHolder.s_s == s);
+            }
+
+            int [] ints = { -100000000, 10000000 };
+            for (int i : ints) {
+                Integer boxed = new Integer(i);
+                tryAccessor(lookup.findSetter(ValueHolder.class, "m_i", int.class),
+                            valueHolder, PrimitiveType.Int, boxed, AccessorType.IPUT);
+                tryAccessor(lookup.findGetter(ValueHolder.class, "m_i", int.class),
+                            valueHolder, PrimitiveType.Int, boxed, AccessorType.IGET);
+                assertTrue(valueHolder.m_i == i);
+                tryAccessor(lookup.findStaticSetter(ValueHolder.class, "s_i", int.class),
+                            valueHolder, PrimitiveType.Int, boxed, AccessorType.SPUT);
+                tryAccessor(lookup.findStaticGetter(ValueHolder.class, "s_i", int.class),
+                            valueHolder, PrimitiveType.Int, boxed, AccessorType.SGET);
+                assertTrue(ValueHolder.s_i == i);
+            }
+
+            float [] floats = { 0.99f, -1.23e-17f };
+            for (float f : floats) {
+                Float boxed = new Float(f);
+                tryAccessor(lookup.findSetter(ValueHolder.class, "m_f", float.class),
+                            valueHolder, PrimitiveType.Float, boxed, AccessorType.IPUT);
+                tryAccessor(lookup.findGetter(ValueHolder.class, "m_f", float.class),
+                            valueHolder, PrimitiveType.Float, boxed, AccessorType.IGET);
+                assertTrue(valueHolder.m_f == f);
+                tryAccessor(lookup.findStaticSetter(ValueHolder.class, "s_f", float.class),
+                            valueHolder, PrimitiveType.Float, boxed, AccessorType.SPUT);
+                tryAccessor(lookup.findStaticGetter(ValueHolder.class, "s_f", float.class),
+                            valueHolder, PrimitiveType.Float, boxed, AccessorType.SGET);
+                assertTrue(ValueHolder.s_f == f);
+            }
+
+            double [] doubles = { 0.44444444444e37, -0.555555555e-37 };
+            for (double d : doubles) {
+                Double boxed = new Double(d);
+                tryAccessor(lookup.findSetter(ValueHolder.class, "m_d", double.class),
+                            valueHolder, PrimitiveType.Double, boxed, AccessorType.IPUT);
+                tryAccessor(lookup.findGetter(ValueHolder.class, "m_d", double.class),
+                            valueHolder, PrimitiveType.Double, boxed, AccessorType.IGET);
+                assertTrue(valueHolder.m_d == d);
+                tryAccessor(lookup.findStaticSetter(ValueHolder.class, "s_d", double.class),
+                            valueHolder, PrimitiveType.Double, boxed, AccessorType.SPUT);
+                tryAccessor(lookup.findStaticGetter(ValueHolder.class, "s_d", double.class),
+                            valueHolder, PrimitiveType.Double, boxed, AccessorType.SGET);
+                assertTrue(ValueHolder.s_d == d);
+            }
+
+            long [] longs = { 0x0123456789abcdefl, 0xfedcba9876543210l };
+            for (long j : longs) {
+                Long boxed = new Long(j);
+                tryAccessor(lookup.findSetter(ValueHolder.class, "m_j", long.class),
+                            valueHolder, PrimitiveType.Long, boxed, AccessorType.IPUT);
+                tryAccessor(lookup.findGetter(ValueHolder.class, "m_j", long.class),
+                            valueHolder, PrimitiveType.Long, boxed, AccessorType.IGET);
+                assertTrue(valueHolder.m_j == j);
+                tryAccessor(lookup.findStaticSetter(ValueHolder.class, "s_j", long.class),
+                            valueHolder, PrimitiveType.Long, boxed, AccessorType.SPUT);
+                tryAccessor(lookup.findStaticGetter(ValueHolder.class, "s_j", long.class),
+                            valueHolder, PrimitiveType.Long, boxed, AccessorType.SGET);
+                assertTrue(ValueHolder.s_j == j);
+            }
+
+            String [] strings = { "octopus", "crab" };
+            for (String s : strings) {
+                tryAccessor(lookup.findSetter(ValueHolder.class, "m_l", String.class),
+                            valueHolder, PrimitiveType.String, s, AccessorType.IPUT);
+                tryAccessor(lookup.findGetter(ValueHolder.class, "m_l", String.class),
+                            valueHolder, PrimitiveType.String, s, AccessorType.IGET);
+                assertTrue(s.equals(valueHolder.m_l));
+                tryAccessor(lookup.findStaticSetter(ValueHolder.class, "s_l", String.class),
+                            valueHolder, PrimitiveType.String, s, AccessorType.SPUT);
+                tryAccessor(lookup.findStaticGetter(ValueHolder.class, "s_l", String.class),
+                            valueHolder, PrimitiveType.String, s, AccessorType.SGET);
+                assertTrue(s.equals(ValueHolder.s_l));
+            }
+
+            System.out.println("Passed MethodHandle.invokeExact() tests for accessors.");
+        }
+    }
+
+    public static class FindAccessorTester extends Tester {
+        public static void main() throws Throwable {
+            // NB having a static field test here is essential for
+            // this test. MethodHandles need to ensure the class
+            // (ValueHolder) is initialized. This happens in the
+            // invoke-polymorphic dispatch.
+            MethodHandles.Lookup lookup = MethodHandles.lookup();
+            try {
+                MethodHandle mh = lookup.findStaticGetter(ValueHolder.class, "s_fi", int.class);
+                int initialValue = (int)mh.invokeExact();
+                System.out.println(initialValue);
+            } catch (NoSuchFieldException e) { unreachable(); }
+            try {
+                MethodHandle mh = lookup.findStaticSetter(ValueHolder.class, "s_i", int.class);
+                mh.invokeExact(0);
+            } catch (NoSuchFieldException e) { unreachable(); }
+            try {
+                lookup.findStaticGetter(ValueHolder.class, "s_fi", byte.class);
+                unreachable();
+            } catch (NoSuchFieldException e) {}
+            try {
+                lookup.findGetter(ValueHolder.class, "s_fi", byte.class);
+                unreachable();
+            } catch (NoSuchFieldException e) {}
+            try {
+                lookup.findStaticSetter(ValueHolder.class, "s_fi", int.class);
+                unreachable();
+            } catch (IllegalAccessException e) {}
+
+            lookup.findGetter(ValueHolder.class, "m_fi", int.class);
+            try {
+                lookup.findGetter(ValueHolder.class, "m_fi", byte.class);
+                unreachable();
+            } catch (NoSuchFieldException e) {}
+            try {
+                lookup.findStaticGetter(ValueHolder.class, "m_fi", byte.class);
+                unreachable();
+            } catch (NoSuchFieldException e) {}
+            try {
+                lookup.findSetter(ValueHolder.class, "m_fi", int.class);
+                unreachable();
+            } catch (IllegalAccessException e) {}
+
+            System.out.println("Passed MethodHandles.Lookup tests for accessors.");
+        }
+    }
+
+    public static class InvokeTester extends Tester {
+        private static void testStaticGetter() throws Throwable {
+            MethodHandles.Lookup lookup = MethodHandles.lookup();
+            MethodHandle h0 = lookup.findStaticGetter(ValueHolder.class, "s_fi", int.class);
+            h0.invoke();
+            Number t = (Number)h0.invoke();
+            int u = (int)h0.invoke();
+            Integer v = (Integer)h0.invoke();
+            long w = (long)h0.invoke();
+            try {
+                byte x = (byte)h0.invoke();
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+            try {
+                String y = (String)h0.invoke();
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+            try {
+                Long z = (Long)h0.invoke();
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+        }
+
+        private static void testMemberGetter() throws Throwable {
+            ValueHolder valueHolder = new ValueHolder();
+            MethodHandles.Lookup lookup = MethodHandles.lookup();
+            MethodHandle h0 = lookup.findGetter(ValueHolder.class, "m_fi", int.class);
+            h0.invoke(valueHolder);
+            Number t = (Number)h0.invoke(valueHolder);
+            int u = (int)h0.invoke(valueHolder);
+            Integer v = (Integer)h0.invoke(valueHolder);
+            long w = (long)h0.invoke(valueHolder);
+            try {
+                byte x = (byte)h0.invoke(valueHolder);
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+            try {
+                String y = (String)h0.invoke(valueHolder);
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+            try {
+                Long z = (Long)h0.invoke(valueHolder);
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+        }
+
+        /*package*/ static Number getDoubleAsNumber() {
+            return new Double(1.4e77);
+        }
+        /*package*/ static Number getFloatAsNumber() {
+            return new Float(7.77);
+        }
+        /*package*/ static Object getFloatAsObject() {
+            return new Float(-7.77);
+        }
+
+        private static void testMemberSetter() throws Throwable {
+            ValueHolder valueHolder = new ValueHolder();
+            MethodHandles.Lookup lookup = MethodHandles.lookup();
+            MethodHandle h0 = lookup.findSetter(ValueHolder.class, "m_f", float.class);
+            h0.invoke(valueHolder, 0.22f);
+            h0.invoke(valueHolder, new Float(1.11f));
+            Number floatNumber = getFloatAsNumber();
+            h0.invoke(valueHolder, floatNumber);
+            assertTrue(valueHolder.m_f == floatNumber.floatValue());
+            Object objNumber = getFloatAsObject();
+            h0.invoke(valueHolder, objNumber);
+            assertTrue(valueHolder.m_f == ((Float) objNumber).floatValue());
+            try {
+              h0.invoke(valueHolder, (Float)null);
+              unreachable();
+            } catch (NullPointerException e) {}
+
+            h0.invoke(valueHolder, (byte)1);
+            h0.invoke(valueHolder, (short)2);
+            h0.invoke(valueHolder, 3);
+            h0.invoke(valueHolder, 4l);
+
+            assertTrue(null == (Object) h0.invoke(valueHolder, 33));
+            assertTrue(0.0f == (float) h0.invoke(valueHolder, 33));
+            assertTrue(0l == (long) h0.invoke(valueHolder, 33));
+
+            try {
+                h0.invoke(valueHolder, 0.33);
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+            try {
+                Number doubleNumber = getDoubleAsNumber();
+                h0.invoke(valueHolder, doubleNumber);
+                unreachable();
+            } catch (ClassCastException e) {}
+            try {
+                Number doubleNumber = null;
+                h0.invoke(valueHolder, doubleNumber);
+                unreachable();
+            } catch (NullPointerException e) {}
+            try {
+                // Mismatched return type - float != void
+                float tmp = (float)h0.invoke(valueHolder, 0.45f);
+                assertTrue(tmp == 0.0);
+            } catch (Exception e) { unreachable(); }
+            try {
+                h0.invoke(valueHolder, "bam");
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+            try {
+                String s = null;
+                h0.invoke(valueHolder, s);
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+        }
+
+        private static void testStaticSetter() throws Throwable {
+            MethodHandles.Lookup lookup = MethodHandles.lookup();
+            MethodHandle h0 = lookup.findStaticSetter(ValueHolder.class, "s_f", float.class);
+            h0.invoke(0.22f);
+            h0.invoke(new Float(1.11f));
+            Number floatNumber = new Float(0.88f);
+            h0.invoke(floatNumber);
+            assertTrue(ValueHolder.s_f == floatNumber.floatValue());
+
+            try {
+              h0.invoke((Float)null);
+              unreachable();
+            } catch (NullPointerException e) {}
+
+            h0.invoke((byte)1);
+            h0.invoke((short)2);
+            h0.invoke(3);
+            h0.invoke(4l);
+
+            assertTrue(null == (Object) h0.invoke(33));
+            assertTrue(0.0f == (float) h0.invoke(33));
+            assertTrue(0l == (long) h0.invoke(33));
+
+            try {
+                h0.invoke(0.33);
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+            try {
+                Number doubleNumber = getDoubleAsNumber();
+                h0.invoke(doubleNumber);
+                unreachable();
+            } catch (ClassCastException e) {}
+            try {
+                Number doubleNumber = new Double(1.01);
+                doubleNumber = (doubleNumber.doubleValue() != 0.1) ? null : doubleNumber;
+                h0.invoke(doubleNumber);
+                unreachable();
+            } catch (NullPointerException e) {}
+            try {
+                // Mismatched return type - float != void
+                float tmp = (float)h0.invoke(0.45f);
+                assertTrue(tmp == 0.0);
+            } catch (Exception e) { unreachable(); }
+            try {
+                h0.invoke("bam");
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+            try {
+                String s = null;
+                h0.invoke(s);
+                unreachable();
+            } catch (WrongMethodTypeException e) {}
+        }
+
+        public static void main() throws Throwable{
+            testStaticGetter();
+            testMemberGetter();
+            testStaticSetter();
+            testMemberSetter();
+            System.out.println("Passed MethodHandle.invoke() tests for accessors.");
+        }
+    }
+
+    public static void main(String[] args) throws Throwable {
+        // FindAccessor test should be the first test class in this
+        // file to ensure class initialization test is run.
+        FindAccessorTester.main();
+        InvokeExactTester.main();
+        InvokeTester.main();
+    }
+}
diff --git a/test/978-virtual-interface/build b/test/978-virtual-interface/build
new file mode 100755
index 0000000..14230c2
--- /dev/null
+++ b/test/978-virtual-interface/build
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# Copyright 2015 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# make us exit on a failure
+set -e
+
+./default-build "$@" --experimental default-methods
diff --git a/test/978-virtual-interface/expected.txt b/test/978-virtual-interface/expected.txt
new file mode 100644
index 0000000..99071b1
--- /dev/null
+++ b/test/978-virtual-interface/expected.txt
@@ -0,0 +1 @@
+Recieved expected ICCE error!
diff --git a/test/978-virtual-interface/info.txt b/test/978-virtual-interface/info.txt
new file mode 100644
index 0000000..0b8a39f
--- /dev/null
+++ b/test/978-virtual-interface/info.txt
@@ -0,0 +1,7 @@
+Smali-based regression test for b/32201623
+
+This test cannot be run with --jvm.
+
+This test checks that we correctly detect when one attempts to invoke an
+interface method via the invoke-virtual opcode and that correct exceptions are
+sent.
diff --git a/test/978-virtual-interface/smali/Iface.smali b/test/978-virtual-interface/smali/Iface.smali
new file mode 100644
index 0000000..9c3ef7a
--- /dev/null
+++ b/test/978-virtual-interface/smali/Iface.smali
@@ -0,0 +1,110 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# // Methods are sorted in alphabetical order in dex file. We need 10 padding
+# // methods to ensure the 11'th target lines up to the same vtable slot as the
+# // first Subtype virtual method (the other 10 are the java/lang/Object;
+# // methods).
+# interface Iface {
+#   public default void fakeMethod_A() {}
+#   public default void fakeMethod_B() {}
+#   public default void fakeMethod_C() {}
+#   public default void fakeMethod_D() {}
+#   public default void fakeMethod_E() {}
+#   public default void fakeMethod_F() {}
+#   public default void fakeMethod_G() {}
+#   public default void fakeMethod_H() {}
+#   public default void fakeMethod_I() {}
+#   public default void fakeMethod_J() {}
+#   public default void fakeMethod_K() {}
+#   public default void fakeMethod_Target() {}
+# }
+
+.class public abstract interface LIface;
+
+.super Ljava/lang/Object;
+
+# // 1
+.method public fakeMethod_A()V
+  .locals 0
+  return-void
+.end method
+
+# // 2
+.method public fakeMethod_B()V
+  .locals 0
+  return-void
+.end method
+
+# // 3
+.method public fakeMethod_C()V
+  .locals 0
+  return-void
+.end method
+
+# // 4
+.method public fakeMethod_D()V
+  .locals 0
+  return-void
+.end method
+
+# // 5
+.method public fakeMethod_E()V
+  .locals 0
+  return-void
+.end method
+
+# // 5
+.method public fakeMethod_F()V
+  .locals 0
+  return-void
+.end method
+
+# // 6
+.method public fakeMethod_G()V
+  .locals 0
+  return-void
+.end method
+
+# // 7
+.method public fakeMethod_H()V
+  .locals 0
+  return-void
+.end method
+
+# // 8
+.method public fakeMethod_I()V
+  .locals 0
+  return-void
+.end method
+
+# // 9
+.method public fakeMethod_J()V
+  .locals 0
+  return-void
+.end method
+
+# // 10
+.method public fakeMethod_K()V
+  .locals 0
+  return-void
+.end method
+
+# // 11
+.method public fakeMethod_Target()V
+  .locals 0
+  return-void
+.end method
diff --git a/test/978-virtual-interface/smali/Main.smali b/test/978-virtual-interface/smali/Main.smali
new file mode 100644
index 0000000..61b82f3
--- /dev/null
+++ b/test/978-virtual-interface/smali/Main.smali
@@ -0,0 +1,50 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+# public class Main {
+#   public static void main(String[] s) {
+#     Subtype s = new Subtype();
+#     try {
+#       s.callPackage();
+#       System.out.println("No error thrown!");
+#     } catch (IncompatibleClassChangeError e) {
+#       System.out.println("Recieved expected ICCE error!");
+#     }
+#   }
+# }
+
+.class public LMain;
+
+.super Ljava/lang/Object;
+
+.method public static main([Ljava/lang/String;)V
+    .locals 3
+
+    new-instance v0, LSubtype;
+    invoke-direct {v0}, LSubtype;-><init>()V
+    sget-object v2, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    :try_start
+        invoke-virtual {v0}, LSubtype;->callPackage()V
+        const-string v1, "No error thrown!"
+        invoke-virtual {v2, v1}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+        return-void
+    :try_end
+    .catch Ljava/lang/IncompatibleClassChangeError; {:try_start .. :try_end} :error_start
+    :error_start
+        const-string v1, "Recieved expected ICCE error!"
+        invoke-virtual {v2, v1}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+        return-void
+.end method
diff --git a/test/978-virtual-interface/smali/Subtype.smali b/test/978-virtual-interface/smali/Subtype.smali
new file mode 100644
index 0000000..f876cf9
--- /dev/null
+++ b/test/978-virtual-interface/smali/Subtype.smali
@@ -0,0 +1,40 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+#  public class Subtype extends pkg.Target implements Iface{
+#    public void callPackage() {
+#      // Fake into a virtual call.
+#      // ((Iface)this).fakeMethod_Target();
+#    }
+#  }
+
+.class public LSubtype;
+
+.super Lpkg/Target;
+
+.implements LIface;
+
+.method public constructor <init>()V
+    .locals 0
+    invoke-direct {p0}, Lpkg/Target;-><init>()V
+    return-void
+.end method
+
+.method public callPackage()V
+    .locals 0
+    invoke-virtual {p0}, LIface;->fakeMethod_Target()V
+    return-void
+.end method
diff --git a/test/978-virtual-interface/smali/Target.smali b/test/978-virtual-interface/smali/Target.smali
new file mode 100644
index 0000000..70108fb
--- /dev/null
+++ b/test/978-virtual-interface/smali/Target.smali
@@ -0,0 +1,40 @@
+# /*
+#  * Copyright (C) 2015 The Android Open Source Project
+#  *
+#  * Licensed under the Apache License, Version 2.0 (the "License");
+#  * you may not use this file except in compliance with the License.
+#  * You may obtain a copy of the License at
+#  *
+#  *      http://www.apache.org/licenses/LICENSE-2.0
+#  *
+#  * Unless required by applicable law or agreed to in writing, software
+#  * distributed under the License is distributed on an "AS IS" BASIS,
+#  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  * See the License for the specific language governing permissions and
+#  * limitations under the License.
+#  */
+#
+#  package pkg;
+#  public class Target {
+#    public void packageMethod() {
+#      System.out.println("Package method called!");
+#    }
+#  }
+
+.class public Lpkg/Target;
+
+.super Ljava/lang/Object;
+
+.method public constructor <init>()V
+    .locals 0
+    invoke-direct {p0}, Ljava/lang/Object;-><init>()V
+    return-void
+.end method
+
+.method packageMethod()V
+    .locals 2
+    const-string v1, "Package method called!"
+    sget-object v0, Ljava/lang/System;->out:Ljava/io/PrintStream;
+    invoke-virtual {v0, v1}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V
+    return-void
+.end method
diff --git a/test/Android.arm_vixl.mk b/test/Android.arm_vixl.mk
new file mode 100644
index 0000000..21b31b4
--- /dev/null
+++ b/test/Android.arm_vixl.mk
@@ -0,0 +1,51 @@
+#
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Known broken tests for the ARM VIXL backend.
+TEST_ART_BROKEN_OPTIMIZING_ARM_VIXL_RUN_TESTS := \
+  003-omnibus-opcodes \
+  020-string \
+  021-string2 \
+  042-new-instance \
+  044-proxy \
+  080-oom-throw \
+  082-inline-execute \
+  096-array-copy-concurrent-gc \
+  099-vmdebug \
+  100-reflect2 \
+  103-string-append \
+  114-ParallelGC \
+  122-npe \
+  129-ThreadGetId \
+  137-cfi \
+  144-static-field-sigquit \
+  412-new-array \
+  439-npe \
+  450-checker-types \
+  488-checker-inline-recursive-calls \
+  515-dce-dominator \
+  520-equivalent-phi \
+  525-checker-arrays-fields1 \
+  525-checker-arrays-fields2 \
+  527-checker-array-access-split \
+  538-checker-embed-constants \
+  552-checker-sharpening \
+  562-checker-no-intermediate \
+  570-checker-osr \
+  602-deoptimizeable \
+  700-LoadArgRegs \
+  800-smali \
+
diff --git a/test/Android.bp b/test/Android.bp
index 628f377..bdb7f80 100644
--- a/test/Android.bp
+++ b/test/Android.bp
@@ -178,6 +178,7 @@
     shared_libs: [
         "libartd",
         "libartd-compiler",
+        "libbase",
     ],
     target: {
         android: {
@@ -238,17 +239,35 @@
     shared_libs: ["libartd"],
 }
 
-art_cc_test_library {
-    name: "libtiagent",
+art_cc_defaults {
+    name: "libtiagent-defaults",
     defaults: ["libartagent-defaults"],
     srcs: [
         "ti-agent/common_load.cc",
         "901-hello-ti-agent/basics.cc",
         "902-hello-transformation/transform.cc",
+        "903-hello-tagging/tagging.cc",
+        "904-object-allocation/tracking.cc",
+        "905-object-free/tracking_free.cc",
+        "906-iterate-heap/iterate_heap.cc",
+        "907-get-loaded-classes/get_loaded_classes.cc",
+        "908-gc-start-finish/gc_callbacks.cc",
+        "909-attach-agent/attach.cc",
+        "910-methods/methods.cc",
+        "911-get-stack-trace/stack_trace.cc",
+        "912-classes/classes.cc",
+        "913-heaps/heaps.cc",
     ],
     shared_libs: [
-        "libart",
         "libbase",
+    ],
+}
+
+art_cc_test_library {
+    name: "libtiagent",
+    defaults: ["libtiagent-defaults"],
+    shared_libs: [
+        "libart",
         "libopenjdkjvmti",
     ],
 }
@@ -256,14 +275,9 @@
 art_cc_test_library {
     name: "libtiagentd",
     defaults: [
-        "libartagent-defaults",
+        "libtiagent-defaults",
         "art_debug_defaults",
     ],
-    srcs: [
-        "ti-agent/common_load.cc",
-        "901-hello-ti-agent/basics.cc",
-        "902-hello-transformation/transform.cc",
-    ],
     shared_libs: [
         "libartd",
         "libopenjdkjvmtid",
diff --git a/test/Android.run-test.mk b/test/Android.run-test.mk
index 064fb25..60318a4 100644
--- a/test/Android.run-test.mk
+++ b/test/Android.run-test.mk
@@ -233,11 +233,9 @@
 
 # Disable 149-suspend-all-stress, its output is flaky (b/28988206).
 # Disable 577-profile-foreign-dex (b/27454772).
-# Disable 955-methodhandles-smali until the accompanying smali change has been landed.
 TEST_ART_BROKEN_ALL_TARGET_TESTS := \
   149-suspend-all-stress \
   577-profile-foreign-dex \
-  955-methodhandles-smali \
 
 ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
     $(COMPILER_TYPES), $(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
@@ -263,12 +261,25 @@
 # 147-stripped-dex-fallback isn't supported on device because --strip-dex
 # requires the zip command.
 # 569-checker-pattern-replacement tests behaviour present only on host.
-# 902-hello-transformation isn't supported in current form due to linker
-# restrictions. See b/31681198
 TEST_ART_BROKEN_TARGET_TESTS := \
   147-stripped-dex-fallback \
-  569-checker-pattern-replacement \
-  902-hello-transformation
+  569-checker-pattern-replacement
+
+# These 9** tests are not supported in current form due to linker
+# restrictions. See b/31681198
+TEST_ART_BROKEN_TARGET_TESTS += \
+  902-hello-transformation \
+  903-hello-tagging \
+  904-object-allocation \
+  905-object-free \
+  906-iterate-heap \
+  907-get-loaded-classes \
+  908-gc-start-finish \
+  909-attach-agent \
+  910-methods \
+  911-get-stack-trace \
+  912-classes \
+  913-heaps \
 
 ifneq (,$(filter target,$(TARGET_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -363,11 +374,15 @@
 # Tests that are broken with GC stress.
 # * 137-cfi needs to unwind a second forked process. We're using a primitive sleep to wait till we
 #   hope the second process got into the expected state. The slowness of gcstress makes this bad.
+# * 908-gc-start-finish expects GCs only to be run at clear points. The reduced heap size makes
+#   this non-deterministic. Same for 913.
 # * 961-default-iface-resolution-gen and 964-default-iface-init-genare very long tests that often
 #   will take more than the timeout to run when gcstress is enabled. This is because gcstress
 #   slows down allocations significantly which these tests do a lot.
 TEST_ART_BROKEN_GCSTRESS_RUN_TESTS := \
   137-cfi \
+  908-gc-start-finish \
+  913-heaps \
   961-default-iface-resolution-gen \
   964-default-iface-init-gen
 
@@ -454,8 +469,10 @@
 # 802 and 570-checker-osr:
 # This test dynamically enables tracing to force a deoptimization. This makes the test meaningless
 # when already tracing, and writes an error message that we do not want to check for.
+# 130 occasional timeout b/32383962.
 TEST_ART_BROKEN_TRACING_RUN_TESTS := \
   087-gc-after-link \
+  130-hprof \
   137-cfi \
   141-class-unload \
   570-checker-osr \
@@ -484,8 +501,14 @@
 # Known broken tests for the JIT.
 # CFI unwinding expects managed frames, and the test does not iterate enough to even compile. JIT
 # also uses Generic JNI instead of the JNI compiler.
+# Test 906 iterates the heap filtering with different options. No instances should be created
+# between those runs to be able to have precise checks.
+# Test 902 hits races with the JIT compiler. b/32821077
 TEST_ART_BROKEN_JIT_RUN_TESTS := \
-  137-cfi
+  137-cfi \
+  902-hello-transformation \
+  904-object-allocation \
+  906-iterate-heap \
 
 ifneq (,$(filter jit,$(COMPILER_TYPES)))
   ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,$(TARGET_TYPES),$(RUN_TYPES),$(PREBUILD_TYPES), \
@@ -509,6 +532,26 @@
       $(TEST_ART_BROKEN_OPTIMIZING_GRAPH_COLOR),$(ALL_ADDRESS_SIZES))
 endif
 
+# Known broken tests for the ARM VIXL backend.
+# Android.arm_vixl.mk defines TEST_ART_BROKEN_OPTIMIZING_ARM_VIXL_RUN_TESTS.
+include $(LOCAL_PATH)/Android.arm_vixl.mk
+
+ifdef ART_USE_VIXL_ARM_BACKEND
+  ifeq (arm,$(filter arm,$(TARGET_ARCH) $(TARGET_2ND_ARCH)))
+    ifneq (,$(filter $(OPTIMIZING_COMPILER_TYPES),$(COMPILER_TYPES)))
+      ART_TEST_KNOWN_BROKEN += $(call all-run-test-names,target,$(RUN_TYPES),$(PREBUILD_TYPES), \
+          $(OPTIMIZING_COMPILER_TYPES),$(RELOCATE_TYPES),$(TRACE_TYPES),$(GC_TYPES),$(JNI_TYPES), \
+          $(IMAGE_TYPES),$(PICTEST_TYPES),$(DEBUGGABLE_TYPES), \
+          $(TEST_ART_BROKEN_OPTIMIZING_ARM_VIXL_RUN_TESTS),32)
+    endif
+  endif
+  # TODO(VIXL): These two tests currently fail, but adding them to `ART_TEST_KNOWN_BROKEN` breaks
+  # `export ART_USE_VIXL_ARM_BACKEND=true && mma -j6 test-art-target-gtest dist`
+  #ART_TEST_KNOWN_BROKEN += test-art-target-gtest-dex2oat_test32
+  #ART_TEST_KNOWN_BROKEN += test-art-target-gtest-image_test32
+endif
+
+
 # Known broken tests for the mips32 optimizing compiler backend.
 TEST_ART_BROKEN_OPTIMIZING_MIPS_RUN_TESTS := \
 
@@ -567,11 +610,8 @@
 # Tests that should fail in the read barrier configuration with the Optimizing compiler (AOT).
 # 484: Baker's fast path based read barrier compiler instrumentation generates code containing
 #      more parallel moves on x86, thus some Checker assertions may fail.
-# 527: On ARM64 and ARM, the read barrier instrumentation does not support the HIntermediateAddress
-#      instruction yet (b/26601270).
 TEST_ART_BROKEN_OPTIMIZING_READ_BARRIER_RUN_TESTS := \
-  484-checker-register-hints \
-  527-checker-array-access-split
+  484-checker-register-hints
 
 # Tests that should fail in the read barrier configuration with JIT (Optimizing compiler).
 TEST_ART_BROKEN_JIT_READ_BARRIER_RUN_TESTS :=
diff --git a/test/562-no-intermediate/src/Main.java b/test/IMTA/Interfaces.java
similarity index 62%
copy from test/562-no-intermediate/src/Main.java
copy to test/IMTA/Interfaces.java
index 3b74d6f..4322f15 100644
--- a/test/562-no-intermediate/src/Main.java
+++ b/test/IMTA/Interfaces.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2016 The Android Open Source Project
+ * Copyright (C) 2011 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,14 +14,11 @@
  * limitations under the License.
  */
 
-public class Main {
-
-  /// CHECK-START-ARM64: int Main.main(String[]) register_allocator (after)
-  /// CHECK-NOT: IntermediateAddress
-  public static void main(String[] args) {
-    array[index] += Math.cos(42);
-  }
-
-  static int index = 0;
-  static double[] array = new double[2];
+class Interfaces {
+    interface A {
+        public void foo();
+    }
+    interface Z {
+        public void foo();
+    }
 }
diff --git a/test/562-no-intermediate/src/Main.java b/test/IMTB/Interfaces.java
similarity index 62%
copy from test/562-no-intermediate/src/Main.java
copy to test/IMTB/Interfaces.java
index 3b74d6f..f252624 100644
--- a/test/562-no-intermediate/src/Main.java
+++ b/test/IMTB/Interfaces.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2016 The Android Open Source Project
+ * Copyright (C) 2011 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,14 +14,15 @@
  * limitations under the License.
  */
 
-public class Main {
-
-  /// CHECK-START-ARM64: int Main.main(String[]) register_allocator (after)
-  /// CHECK-NOT: IntermediateAddress
-  public static void main(String[] args) {
-    array[index] += Math.cos(42);
-  }
-
-  static int index = 0;
-  static double[] array = new double[2];
+class Interfaces {
+    interface A {
+        public void bar();
+        public void foo();
+    }
+    interface L {
+        public void foo();
+    }
+    interface Z {
+        public void foo();
+    }
 }
diff --git a/test/VerifierDeps/MyClassWithNoSuper.smali b/test/VerifierDeps/MyClassWithNoSuper.smali
new file mode 100644
index 0000000..d8509bc
--- /dev/null
+++ b/test/VerifierDeps/MyClassWithNoSuper.smali
@@ -0,0 +1,16 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LMyClassWithNoSuper;
+.super LNoSuper;
diff --git a/test/VerifierDeps/MyClassWithNoSuperButFailures.smali b/test/VerifierDeps/MyClassWithNoSuperButFailures.smali
new file mode 100644
index 0000000..1dbe9d1
--- /dev/null
+++ b/test/VerifierDeps/MyClassWithNoSuperButFailures.smali
@@ -0,0 +1,21 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LMyClassWithNoSuperButFailures;
+.super LNoSuper;
+
+.method public final foo()I
+  .registers 1
+  return-void
+.end method
diff --git a/test/VerifierDeps/MyVerificationFailure.smali b/test/VerifierDeps/MyVerificationFailure.smali
new file mode 100644
index 0000000..187b1ad
--- /dev/null
+++ b/test/VerifierDeps/MyVerificationFailure.smali
@@ -0,0 +1,21 @@
+# Copyright (C) 2016 The Android Open Source Project
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+.class public LMyVerificationFailure;
+.super Ljava/lang/Object;
+
+.method public final foo()I
+  .registers 1
+  return-void
+.end method
diff --git a/test/common/runtime_state.cc b/test/common/runtime_state.cc
index 4248148..a2eb370 100644
--- a/test/common/runtime_state.cc
+++ b/test/common/runtime_state.cc
@@ -128,9 +128,10 @@
     return;
   }
 
+  Thread* self = Thread::Current();
   ArtMethod* method = nullptr;
   {
-    ScopedObjectAccess soa(Thread::Current());
+    ScopedObjectAccess soa(self);
 
     ScopedUtfChars chars(env, method_name);
     CHECK(chars.c_str() != nullptr);
@@ -147,11 +148,11 @@
     } else {
       // Sleep to yield to the compiler thread.
       usleep(1000);
-      ScopedObjectAccess soa(Thread::Current());
+      ScopedObjectAccess soa(self);
       // Make sure there is a profiling info, required by the compiler.
-      ProfilingInfo::Create(soa.Self(), method, /* retry_allocation */ true);
+      ProfilingInfo::Create(self, method, /* retry_allocation */ true);
       // Will either ensure it's compiled or do the compilation itself.
-      jit->CompileMethod(method, soa.Self(), /* osr */ false);
+      jit->CompileMethod(method, self, /* osr */ false);
     }
   }
 }
diff --git a/test/dexdump/invoke-polymorphic.dex b/test/dexdump/invoke-polymorphic.dex
new file mode 100644
index 0000000..5cf3068
--- /dev/null
+++ b/test/dexdump/invoke-polymorphic.dex
Binary files differ
diff --git a/test/dexdump/invoke-polymorphic.lst b/test/dexdump/invoke-polymorphic.lst
new file mode 100644
index 0000000..3eb8e24
--- /dev/null
+++ b/test/dexdump/invoke-polymorphic.lst
@@ -0,0 +1,3 @@
+#invoke-polymorphic.dex
+0x000001bc 8 Main <init> ()V Main.java 9
+0x000001d4 60 Main main ([Ljava/lang/String;)V Main.java 31
diff --git a/test/dexdump/invoke-polymorphic.txt b/test/dexdump/invoke-polymorphic.txt
new file mode 100644
index 0000000..16e708c
--- /dev/null
+++ b/test/dexdump/invoke-polymorphic.txt
@@ -0,0 +1,109 @@
+Processing 'invoke-polymorphic.dex'...
+Opened 'invoke-polymorphic.dex', DEX version '037'
+DEX file header:
+magic               : 'dex\n037\0'
+checksum            : 0b5f9fd7
+signature           : fcf4...f0e5
+file_size           : 1160
+header_size         : 112
+link_size           : 0
+link_off            : 0 (0x000000)
+string_ids_size     : 30
+string_ids_off      : 112 (0x000070)
+type_ids_size       : 11
+type_ids_off        : 232 (0x0000e8)
+proto_ids_size      : 6
+proto_ids_off       : 276 (0x000114)
+field_ids_size      : 0
+field_ids_off       : 0 (0x000000)
+method_ids_size     : 5
+method_ids_off      : 348 (0x00015c)
+class_defs_size     : 1
+class_defs_off      : 388 (0x000184)
+data_size           : 740
+data_off            : 420 (0x0001a4)
+
+Class #0 header:
+class_idx           : 2
+access_flags        : 1 (0x0001)
+superclass_idx      : 4
+interfaces_off      : 0 (0x000000)
+source_file_idx     : 12
+annotations_off     : 528 (0x000210)
+class_data_off      : 959 (0x0003bf)
+static_fields_size  : 0
+instance_fields_size: 0
+direct_methods_size : 2
+virtual_methods_size: 0
+
+Class #0 annotations:
+Annotations on method #1 'main'
+  VISIBILITY_SYSTEM Ldalvik/annotation/Throws; value={ Ljava/lang/Throwable; }
+
+Class #0            -
+  Class descriptor  : 'LMain;'
+  Access flags      : 0x0001 (PUBLIC)
+  Superclass        : 'Ljava/lang/Object;'
+  Interfaces        -
+  Static fields     -
+  Instance fields   -
+  Direct methods    -
+    #0              : (in LMain;)
+      name          : '<init>'
+      type          : '()V'
+      access        : 0x10001 (PUBLIC CONSTRUCTOR)
+      code          -
+      registers     : 1
+      ins           : 1
+      outs          : 1
+      insns size    : 4 16-bit code units
+0001ac:                                        |[0001ac] Main.<init>:()V
+0001bc: 7010 0200 0000                         |0000: invoke-direct {v0}, Ljava/lang/Object;.<init>:()V // method@0002
+0001c2: 0e00                                   |0003: return-void
+      catches       : (none)
+      positions     : 
+        0x0000 line=9
+      locals        : 
+        0x0000 - 0x0004 reg=0 this LMain; 
+
+    #1              : (in LMain;)
+      name          : 'main'
+      type          : '([Ljava/lang/String;)V'
+      access        : 0x0009 (PUBLIC STATIC)
+      code          -
+      registers     : 10
+      ins           : 1
+      outs          : 0
+      insns size    : 30 16-bit code units
+0001c4:                                        |[0001c4] Main.main:([Ljava/lang/String;)V
+0001d4: 1802 9a99 9999 9999 0140               |0000: const-wide v2, #double 2.2 // #400199999999999a
+0001de: 1214                                   |0005: const/4 v4, #int 1 // #1
+0001e0: 1200                                   |0006: const/4 v0, #int 0 // #0
+0001e2: 1205                                   |0007: const/4 v5, #int 0 // #0
+0001e4: 1b01 1200 0000                         |0008: const-string/jumbo v1, "a" // string@00000012
+0001ea: 0146                                   |000b: move v6, v4
+0001ec: fb07 0300 0000 0200                    |000c: invoke-polymorphic/range {v0, v1, v2, v3, v4, v5, v6}, Ljava/lang/invoke/MethodHandle;.invoke:([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/String;DILjava/lang/Object;I)Ljava/lang/String; // method@0003, proto@0002
+0001f4: 0c07                                   |0010: move-result-object v7
+0001f6: fa40 0400 2043 0000                    |0011: invoke-polymorphic {v0, v2, v3, v4}, Ljava/lang/invoke/MethodHandle;.invokeExact:([Ljava/lang/Object;)Ljava/lang/Object;, (DI)I // method@0004, proto@0000
+0001fe: 0a08                                   |0015: move-result v8
+000200: 1b01 1200 0000                         |0016: const-string/jumbo v1, "a" // string@00000012
+000206: fa54 0300 1032 0400                    |0019: invoke-polymorphic {v0, v1, v2, v3, v4}, Ljava/lang/invoke/MethodHandle;.invoke:([Ljava/lang/Object;)Ljava/lang/Object;, (Ljava/lang/String;DI)V // method@0003, proto@0004
+00020e: 0e00                                   |001d: return-void
+      catches       : (none)
+      positions     : 
+        0x0006 line=31
+        0x0007 line=32
+        0x0008 line=33
+        0x0011 line=34
+        0x0016 line=35
+        0x001d line=56
+      locals        : 
+        0x0007 - 0x001e reg=0 handle Ljava/lang/invoke/MethodHandle; 
+        0x0008 - 0x001e reg=5 o Ljava/lang/Object; 
+        0x0011 - 0x001e reg=7 s Ljava/lang/String; 
+        0x0016 - 0x001e reg=8 x I 
+        0x0000 - 0x001e reg=9 args [Ljava/lang/String; 
+
+  Virtual methods   -
+  source_file_idx   : 12 (Main.java)
+
diff --git a/test/dexdump/invoke-polymorphic.xml b/test/dexdump/invoke-polymorphic.xml
new file mode 100644
index 0000000..ab99a76
--- /dev/null
+++ b/test/dexdump/invoke-polymorphic.xml
@@ -0,0 +1,33 @@
+<api>
+<package name=""
+>
+<class name="Main"
+ extends="java.lang.Object"
+ interface="false"
+ abstract="false"
+ static="false"
+ final="false"
+ visibility="public"
+>
+<constructor name="Main"
+ type="Main"
+ static="false"
+ final="false"
+ visibility="public"
+>
+</constructor>
+<method name="main"
+ return="void"
+ abstract="false"
+ native="false"
+ synchronized="false"
+ static="true"
+ final="false"
+ visibility="public"
+>
+<parameter name="arg0" type="java.lang.String[]">
+</parameter>
+</method>
+</class>
+</package>
+</api>
diff --git a/test/etc/run-test-jar b/test/etc/run-test-jar
index d8f42a2..c525b2b 100755
--- a/test/etc/run-test-jar
+++ b/test/etc/run-test-jar
@@ -44,7 +44,7 @@
 TIME_OUT="gdb"  # "n" (disabled), "timeout" (use timeout), "gdb" (use gdb)
 # Value in seconds
 if [ "$ART_USE_READ_BARRIER" = "true" ]; then
-  TIME_OUT_VALUE=1800  # 30 minutes.
+  TIME_OUT_VALUE=2400  # 40 minutes.
 else
   TIME_OUT_VALUE=1200  # 20 minutes.
 fi
@@ -148,7 +148,7 @@
         SECONDARY_DEX=":$DEX_LOCATION/$TEST_NAME-ex.jar"
         # Enable cfg-append to make sure we get the dump for both dex files.
         # (otherwise the runtime compilation of the secondary dex will overwrite
-        # the dump of the first one)
+        # the dump of the first one).
         FLAGS="${FLAGS} -Xcompiler-option --dump-cfg-append"
         COMPILE_FLAGS="${COMPILE_FLAGS} --dump-cfg-append"
         shift
@@ -397,9 +397,30 @@
 fi
 
 if [ "$HOST" = "n" ]; then
-  ISA=$(adb shell ls -F /data/dalvik-cache | grep -Ewo "${ARCHITECTURES_PATTERN}")
+  # Need to be root to query /data/dalvik-cache
+  adb root > /dev/null
+  adb wait-for-device
+  ISA=
+  ISA_adb_invocation=
+  ISA_outcome=
+  # We iterate a few times to workaround an adb issue. b/32655576
+  for i in {1..10}; do
+    ISA_adb_invocation=$(adb shell ls -F /data/dalvik-cache)
+    ISA_outcome=$?
+    ISA=$(echo $ISA_adb_invocation | grep -Ewo "${ARCHITECTURES_PATTERN}")
+    if [ x"$ISA" != "x" ]; then
+      break;
+    fi
+  done
   if [ x"$ISA" = "x" ]; then
     echo "Unable to determine architecture"
+    # Print a few things for helping diagnosing the problem.
+    echo "adb invocation output: $ISA_adb_invocation"
+    echo "adb invocation outcome: $ISA_outcome"
+    echo $(adb shell ls -F /data/dalvik-cache)
+    echo $(adb shell ls /data/dalvik-cache)
+    echo ${ARCHITECTURES_PATTERN}
+    echo $(adb shell ls -F /data/dalvik-cache | grep -Ewo "${ARCHITECTURES_PATTERN}")
     exit 1
   fi
 fi
diff --git a/test/run-test b/test/run-test
index 7a4afaf..37eefb3 100755
--- a/test/run-test
+++ b/test/run-test
@@ -758,8 +758,8 @@
 if [ "$run_checker" = "yes" -a "$target_mode" = "yes" ]; then
   # We will need to `adb pull` the .cfg output from the target onto the host to
   # run checker on it. This file can be big.
-  build_file_size_limit=24576
-  run_file_size_limit=24576
+  build_file_size_limit=32768
+  run_file_size_limit=32768
 fi
 if [ ${USE_JACK} = "false" ]; then
   # Set ulimit if we build with dx only, Jack can generate big temp files.
diff --git a/test/ti-agent/common_helper.h b/test/ti-agent/common_helper.h
new file mode 100644
index 0000000..84997f3
--- /dev/null
+++ b/test/ti-agent/common_helper.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2016 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ART_TEST_TI_AGENT_COMMON_HELPER_H_
+#define ART_TEST_TI_AGENT_COMMON_HELPER_H_
+
+#include "jni.h"
+#include "ScopedLocalRef.h"
+
+namespace art {
+
+template <typename T>
+static jobjectArray CreateObjectArray(JNIEnv* env,
+                                      jint length,
+                                      const char* component_type_descriptor,
+                                      T src) {
+  if (length < 0) {
+    return nullptr;
+  }
+
+  ScopedLocalRef<jclass> obj_class(env, env->FindClass(component_type_descriptor));
+  if (obj_class.get() == nullptr) {
+    return nullptr;
+  }
+
+  ScopedLocalRef<jobjectArray> ret(env, env->NewObjectArray(length, obj_class.get(), nullptr));
+  if (ret.get() == nullptr) {
+    return nullptr;
+  }
+
+  for (jint i = 0; i < length; ++i) {
+    jobject element = src(i);
+    env->SetObjectArrayElement(ret.get(), static_cast<jint>(i), element);
+    env->DeleteLocalRef(element);
+    if (env->ExceptionCheck()) {
+      return nullptr;
+    }
+  }
+
+  return ret.release();
+}
+
+static void SetAllCapabilities(jvmtiEnv* env) {
+  jvmtiCapabilities caps;
+  env->GetPotentialCapabilities(&caps);
+  env->AddCapabilities(&caps);
+}
+
+}  // namespace art
+
+#endif  // ART_TEST_TI_AGENT_COMMON_HELPER_H_
diff --git a/test/ti-agent/common_load.cc b/test/ti-agent/common_load.cc
index 53bb153..a959482 100644
--- a/test/ti-agent/common_load.cc
+++ b/test/ti-agent/common_load.cc
@@ -22,12 +22,26 @@
 #include "art_method-inl.h"
 #include "base/logging.h"
 #include "base/macros.h"
+#include "common_load.h"
 
 #include "901-hello-ti-agent/basics.h"
 #include "902-hello-transformation/transform.h"
+#include "903-hello-tagging/tagging.h"
+#include "904-object-allocation/tracking.h"
+#include "905-object-free/tracking_free.h"
+#include "906-iterate-heap/iterate_heap.h"
+#include "907-get-loaded-classes/get_loaded_classes.h"
+#include "908-gc-start-finish/gc_callbacks.h"
+#include "909-attach-agent/attach.h"
+#include "910-methods/methods.h"
+#include "911-get-stack-trace/stack_trace.h"
+#include "912-classes/classes.h"
+#include "913-heaps/heaps.h"
 
 namespace art {
 
+jvmtiEnv* jvmti_env;
+
 using OnLoad   = jint (*)(JavaVM* vm, char* options, void* reserved);
 using OnAttach = jint (*)(JavaVM* vm, char* options, void* reserved);
 
@@ -41,6 +55,17 @@
 AgentLib agents[] = {
   { "901-hello-ti-agent", Test901HelloTi::OnLoad, nullptr },
   { "902-hello-transformation", Test902HelloTransformation::OnLoad, nullptr },
+  { "903-hello-tagging", Test903HelloTagging::OnLoad, nullptr },
+  { "904-object-allocation", Test904ObjectAllocation::OnLoad, nullptr },
+  { "905-object-free", Test905ObjectFree::OnLoad, nullptr },
+  { "906-iterate-heap", Test906IterateHeap::OnLoad, nullptr },
+  { "907-get-loaded-classes", Test907GetLoadedClasses::OnLoad, nullptr },
+  { "908-gc-start-finish", Test908GcStartFinish::OnLoad, nullptr },
+  { "909-attach-agent", nullptr, Test909AttachAgent::OnAttach },
+  { "910-methods", Test910Methods::OnLoad, nullptr },
+  { "911-get-stack-trace", Test911GetStackTrace::OnLoad, nullptr },
+  { "912-classes", Test912Classes::OnLoad, nullptr },
+  { "913-heaps", Test913Heaps::OnLoad, nullptr },
 };
 
 static AgentLib* FindAgent(char* name) {
@@ -90,7 +115,6 @@
   return lib->load(vm, remaining_options, reserved);
 }
 
-
 extern "C" JNIEXPORT jint JNICALL Agent_OnAttach(JavaVM* vm, char* options, void* reserved) {
   char* remaining_options = nullptr;
   char* name_option = nullptr;
diff --git a/test/562-no-intermediate/src/Main.java b/test/ti-agent/common_load.h
similarity index 68%
copy from test/562-no-intermediate/src/Main.java
copy to test/ti-agent/common_load.h
index 3b74d6f..fac94b4 100644
--- a/test/562-no-intermediate/src/Main.java
+++ b/test/ti-agent/common_load.h
@@ -14,14 +14,15 @@
  * limitations under the License.
  */
 
-public class Main {
+#ifndef ART_TEST_TI_AGENT_COMMON_LOAD_H_
+#define ART_TEST_TI_AGENT_COMMON_LOAD_H_
 
-  /// CHECK-START-ARM64: int Main.main(String[]) register_allocator (after)
-  /// CHECK-NOT: IntermediateAddress
-  public static void main(String[] args) {
-    array[index] += Math.cos(42);
-  }
+#include "openjdkjvmti/jvmti.h"
 
-  static int index = 0;
-  static double[] array = new double[2];
-}
+namespace art {
+
+extern jvmtiEnv* jvmti_env;
+
+}  // namespace art
+
+#endif  // ART_TEST_TI_AGENT_COMMON_LOAD_H_
diff --git a/tools/ahat/Android.mk b/tools/ahat/Android.mk
index ebf087d..27c2054 100644
--- a/tools/ahat/Android.mk
+++ b/tools/ahat/Android.mk
@@ -55,11 +55,23 @@
 include $(BUILD_HOST_JAVA_LIBRARY)
 AHAT_TEST_JAR := $(LOCAL_BUILT_MODULE)
 
+# Rule to generate the proguard configuration for the test-dump program.
+# We copy the configuration to the intermediates directory because jack will
+# output the proguard map in that same directory.
+AHAT_TEST_DUMP_PROGUARD_CONFIG := $(intermediates.COMMON)/config.pro
+AHAT_TEST_DUMP_PROGUARD_MAP := $(intermediates.COMMON)/proguard.map
+$(AHAT_TEST_DUMP_PROGUARD_CONFIG): PRIVATE_AHAT_PROGUARD_CONFIG_IN := $(LOCAL_PATH)/test-dump/config.pro
+$(AHAT_TEST_DUMP_PROGUARD_CONFIG): PRIVATE_AHAT_PROGUARD_CONFIG := $(AHAT_TEST_DUMP_PROGUARD_CONFIG)
+$(AHAT_TEST_DUMP_PROGUARD_CONFIG): $(LOCAL_PATH)/test-dump/config.pro
+	cp $(PRIVATE_AHAT_PROGUARD_CONFIG_IN) $(PRIVATE_AHAT_PROGUARD_CONFIG)
+
 # --- ahat-test-dump.jar --------------
 include $(CLEAR_VARS)
 LOCAL_MODULE := ahat-test-dump
 LOCAL_MODULE_TAGS := tests
 LOCAL_SRC_FILES := $(call all-java-files-under, test-dump)
+LOCAL_ADDITIONAL_DEPENDENCIES := $(AHAT_TEST_DUMP_PROGUARD_CONFIG)
+LOCAL_JACK_FLAGS := --config-proguard $(AHAT_TEST_DUMP_PROGUARD_CONFIG)
 include $(BUILD_HOST_DALVIK_JAVA_LIBRARY)
 
 # Determine the location of the test-dump.jar and test-dump.hprof files.
@@ -84,12 +96,15 @@
 .PHONY: ahat-test
 ahat-test: PRIVATE_AHAT_TEST_DUMP_HPROF := $(AHAT_TEST_DUMP_HPROF)
 ahat-test: PRIVATE_AHAT_TEST_JAR := $(AHAT_TEST_JAR)
+ahat-test: PRIVATE_AHAT_PROGUARD_MAP := $(AHAT_TEST_DUMP_PROGUARD_MAP)
 ahat-test: $(AHAT_TEST_JAR) $(AHAT_TEST_DUMP_HPROF)
-	java -Dahat.test.dump.hprof=$(PRIVATE_AHAT_TEST_DUMP_HPROF) -jar $(PRIVATE_AHAT_TEST_JAR)
+	java -Dahat.test.dump.hprof=$(PRIVATE_AHAT_TEST_DUMP_HPROF) -Dahat.test.dump.map=$(PRIVATE_AHAT_PROGUARD_MAP) -jar $(PRIVATE_AHAT_TEST_JAR)
 
 # Clean up local variables.
 AHAT_TEST_DUMP_DEPENDENCIES :=
 AHAT_TEST_DUMP_HPROF :=
 AHAT_TEST_DUMP_JAR :=
+AHAT_TEST_DUMP_PROGUARD_CONFIG :=
+AHAT_TEST_DUMP_PROGUARD_MAP :=
 AHAT_TEST_JAR :=
 
diff --git a/tools/ahat/README.txt b/tools/ahat/README.txt
index 8604ff0..8dfb4ab 100644
--- a/tools/ahat/README.txt
+++ b/tools/ahat/README.txt
@@ -1,12 +1,14 @@
 AHAT - Android Heap Analysis Tool
 
 Usage:
-  java -jar ahat.jar [-p port] FILE
+  java -jar ahat.jar [-p port] [--proguard-map FILE] FILE
     Launch an http server for viewing the given Android heap-dump FILE.
 
   Options:
     -p <port>
        Serve pages on the given port. Defaults to 7100.
+    --proguard-map FILE
+       Use the proguard map FILE to deobfuscate the heap dump.
 
 TODO:
  * Have a way to diff two heap dumps.
@@ -74,7 +76,9 @@
  * Instance.isRoot and Instance.getRootTypes.
 
 Release History:
- 0.8 Pending
+ 0.9 Pending
+
+ 0.8 Oct 18, 2016
    Show sample path from GC root with field names in place of dominator path.
 
  0.7 Aug 16, 2016
@@ -82,7 +86,7 @@
    Target Java 1.7.
 
  0.6 Jun 21, 2016
-   Add support for proguard deobfuscation (pending AOSP push of perflib)
+   Add support for proguard deobfuscation.
 
  0.5 Apr 19, 2016
    Update perflib to perflib-25.0.0 to improve processing performance.
diff --git a/tools/ahat/src/AhatSnapshot.java b/tools/ahat/src/AhatSnapshot.java
index a8205c7..ba8243f 100644
--- a/tools/ahat/src/AhatSnapshot.java
+++ b/tools/ahat/src/AhatSnapshot.java
@@ -20,6 +20,7 @@
 import com.android.tools.perflib.heap.ClassObj;
 import com.android.tools.perflib.heap.Heap;
 import com.android.tools.perflib.heap.Instance;
+import com.android.tools.perflib.heap.ProguardMap;
 import com.android.tools.perflib.heap.RootObj;
 import com.android.tools.perflib.heap.RootType;
 import com.android.tools.perflib.heap.Snapshot;
@@ -71,8 +72,8 @@
   /**
    * Create an AhatSnapshot from an hprof file.
    */
-  public static AhatSnapshot fromHprof(File hprof) throws IOException {
-    Snapshot snapshot = Snapshot.createSnapshot(new MemoryMappedFileBuffer(hprof));
+  public static AhatSnapshot fromHprof(File hprof, ProguardMap map) throws IOException {
+    Snapshot snapshot = Snapshot.createSnapshot(new MemoryMappedFileBuffer(hprof), map);
     snapshot.computeDominators();
     return new AhatSnapshot(snapshot);
   }
diff --git a/tools/ahat/src/Main.java b/tools/ahat/src/Main.java
index fdc5a86..c79b578 100644
--- a/tools/ahat/src/Main.java
+++ b/tools/ahat/src/Main.java
@@ -16,24 +16,28 @@
 
 package com.android.ahat;
 
+import com.android.tools.perflib.heap.ProguardMap;
 import com.sun.net.httpserver.HttpServer;
 import java.io.File;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.net.InetAddress;
 import java.net.InetSocketAddress;
+import java.text.ParseException;
 import java.util.concurrent.Executors;
 
 public class Main {
 
   public static void help(PrintStream out) {
-    out.println("java -jar ahat.jar [-p port] FILE");
+    out.println("java -jar ahat.jar [-p port] [--proguard-map FILE] FILE");
     out.println("  Launch an http server for viewing "
         + "the given Android heap-dump FILE.");
     out.println("");
     out.println("Options:");
     out.println("  -p <port>");
     out.println("     Serve pages on the given port. Defaults to 7100.");
+    out.println("  --proguard-map FILE");
+    out.println("     Use the proguard map FILE to deobfuscate the heap dump.");
     out.println("");
   }
 
@@ -47,10 +51,19 @@
     }
 
     File hprof = null;
+    ProguardMap map = new ProguardMap();
     for (int i = 0; i < args.length; i++) {
       if ("-p".equals(args[i]) && i + 1 < args.length) {
         i++;
         port = Integer.parseInt(args[i]);
+      } else if ("--proguard-map".equals(args[i]) && i + 1 < args.length) {
+        i++;
+        try {
+          map.readFromFile(new File(args[i]));
+        } catch (IOException|ParseException ex) {
+          System.out.println("Unable to read proguard map: " + ex);
+          System.out.println("The proguard map will not be used.");
+        }
       } else {
         if (hprof != null) {
           System.err.println("multiple input files.");
@@ -74,7 +87,7 @@
     HttpServer server = HttpServer.create(addr, 0);
 
     System.out.println("Processing hprof file...");
-    AhatSnapshot ahat = AhatSnapshot.fromHprof(hprof);
+    AhatSnapshot ahat = AhatSnapshot.fromHprof(hprof, map);
     server.createContext("/", new AhatHttpHandler(new OverviewHandler(ahat, hprof)));
     server.createContext("/rooted", new AhatHttpHandler(new RootedHandler(ahat)));
     server.createContext("/object", new AhatHttpHandler(new ObjectHandler(ahat)));
diff --git a/tools/ahat/src/manifest.txt b/tools/ahat/src/manifest.txt
index cac53c5..1993910 100644
--- a/tools/ahat/src/manifest.txt
+++ b/tools/ahat/src/manifest.txt
@@ -1,4 +1,4 @@
 Name: ahat/
 Implementation-Title: ahat
-Implementation-Version: 0.7
+Implementation-Version: 0.8
 Main-Class: com.android.ahat.Main
diff --git a/tools/ahat/test-dump/config.pro b/tools/ahat/test-dump/config.pro
new file mode 100644
index 0000000..0cf7a87
--- /dev/null
+++ b/tools/ahat/test-dump/config.pro
@@ -0,0 +1,15 @@
+# The goal of this proguard configuration is to obfuscate the test-dump
+# program so that the heap dump it generates is an obfuscated heap dump.
+# This allows us to test that deobfuscation of the generated heap dump is
+# working properly.
+
+# All we care about is obfuscation. Don't do any other optimizations.
+-dontpreverify
+-dontoptimize
+-dontshrink
+
+-keep public class Main {
+  public static void main(java.lang.String[]);
+}
+
+-printmapping proguard.map
diff --git a/tools/ahat/test/TestDump.java b/tools/ahat/test/TestDump.java
index c3a76e4..ebce61c 100644
--- a/tools/ahat/test/TestDump.java
+++ b/tools/ahat/test/TestDump.java
@@ -19,8 +19,10 @@
 import com.android.tools.perflib.heap.ClassObj;
 import com.android.tools.perflib.heap.Field;
 import com.android.tools.perflib.heap.Instance;
+import com.android.tools.perflib.heap.ProguardMap;
 import java.io.File;
 import java.io.IOException;
+import java.text.ParseException;
 import java.util.Map;
 
 /**
@@ -44,11 +46,21 @@
    * For example:
    *   java -Dahat.test.dump.hprof=test-dump.hprof -jar ahat-tests.jar
    *
-   * An IOException is thrown if there is a failure reading the hprof file.
+   * An IOException is thrown if there is a failure reading the hprof file or
+   * the proguard map.
    */
   private TestDump() throws IOException {
       String hprof = System.getProperty("ahat.test.dump.hprof");
-      mSnapshot = AhatSnapshot.fromHprof(new File(hprof));
+
+      String mapfile = System.getProperty("ahat.test.dump.map");
+      ProguardMap map = new ProguardMap();
+      try {
+        map.readFromFile(new File(mapfile));
+      } catch (ParseException e) {
+        throw new IOException("Unable to load proguard map", e);
+      }
+
+      mSnapshot = AhatSnapshot.fromHprof(new File(hprof), map);
   }
 
   /**
diff --git a/tools/buildbot-build.sh b/tools/buildbot-build.sh
index 5ef66d1..2d26b48 100755
--- a/tools/buildbot-build.sh
+++ b/tools/buildbot-build.sh
@@ -19,7 +19,17 @@
   exit 1
 fi
 
-out_dir=${OUT_DIR-out}
+# Logic for setting out_dir from build/make/core/envsetup.mk:
+if [[ -z $OUT_DIR ]]; then
+  if [[ -z $OUT_DIR_COMMON_BASE ]]; then
+    out_dir=out
+  else
+    out_dir=${OUT_DIR_COMMON_BASE}/${PWD##*/}
+  fi
+else
+  out_dir=${OUT_DIR}
+fi
+
 java_libraries_dir=${out_dir}/target/common/obj/JAVA_LIBRARIES
 common_targets="vogar core-tests apache-harmony-jdwp-tests-hostdex jsr166-tests mockito-target ${out_dir}/host/linux-x86/bin/jack"
 mode="target"
@@ -45,16 +55,6 @@
   fi
 done
 
-# Workaround for repo incompatibilities on the Chromium buildbot.
-# TODO: Remove this workaround once https://bugs.chromium.org/p/chromium/issues/detail?id=646329
-# is addressed.
-repo=$(which repo)
-if [[ $repo == *"depot_tools"* ]]; then
-  ln -s build/soong/root.bp Android.bp
-  ln -s build/soong/bootstrap.bash bootstrap.bash
-  echo "include build/core/main.mk" > Makefile
-fi
-
 if [[ $mode == "host" ]]; then
   make_command="make $j_arg $showcommands build-art-host-tests $common_targets"
   make_command+=" ${out_dir}/host/linux-x86/lib/libjavacoretests.so "
diff --git a/tools/cpp-define-generator/constant_class.def b/tools/cpp-define-generator/constant_class.def
index 58372f9..f46cd33 100644
--- a/tools/cpp-define-generator/constant_class.def
+++ b/tools/cpp-define-generator/constant_class.def
@@ -25,6 +25,7 @@
 
 DEFINE_FLAG_OFFSET(MIRROR_CLASS, STATUS_INITIALIZED,       art::mirror::Class::kStatusInitialized)
 DEFINE_FLAG_OFFSET(ACCESS_FLAGS, CLASS_IS_FINALIZABLE,     art::kAccClassIsFinalizable)
+DEFINE_FLAG_OFFSET(ACCESS_FLAGS, CLASS_IS_INTERFACE,       art::kAccInterface)
 // TODO: We should really have a BitPosition which also checks it's a power of 2.
 DEFINE_FLAG_OFFSET(ACCESS_FLAGS, CLASS_IS_FINALIZABLE_BIT, art::MostSignificantBit(art::kAccClassIsFinalizable))
 
diff --git a/tools/cpp-define-generator/constant_lockword.def b/tools/cpp-define-generator/constant_lockword.def
index 67ed5b5..08d5885 100644
--- a/tools/cpp-define-generator/constant_lockword.def
+++ b/tools/cpp-define-generator/constant_lockword.def
@@ -30,6 +30,10 @@
 DEFINE_LOCK_WORD_EXPR(READ_BARRIER_STATE_MASK_TOGGLED, uint32_t, kReadBarrierStateMaskShiftedToggled)
 DEFINE_LOCK_WORD_EXPR(THIN_LOCK_COUNT_ONE,       int32_t,  kThinLockCountOne)
 
+DEFINE_LOCK_WORD_EXPR(STATE_FORWARDING_ADDRESS, uint32_t, kStateForwardingAddress)
+DEFINE_LOCK_WORD_EXPR(STATE_FORWARDING_ADDRESS_OVERFLOW, uint32_t, kStateForwardingAddressOverflow)
+DEFINE_LOCK_WORD_EXPR(STATE_FORWARDING_ADDRESS_SHIFT, uint32_t, kForwardingAddressShift)
+
 DEFINE_LOCK_WORD_EXPR(GC_STATE_MASK_SHIFTED,   uint32_t,  kGCStateMaskShifted)
 DEFINE_LOCK_WORD_EXPR(GC_STATE_MASK_SHIFTED_TOGGLED, uint32_t, kGCStateMaskShiftedToggled)
 DEFINE_LOCK_WORD_EXPR(GC_STATE_SHIFT,   int32_t,  kGCStateShift)
diff --git a/tools/cpp-define-generator/constant_thread.def b/tools/cpp-define-generator/constant_thread.def
index af5ca21..1364b55 100644
--- a/tools/cpp-define-generator/constant_thread.def
+++ b/tools/cpp-define-generator/constant_thread.def
@@ -25,5 +25,7 @@
 
 DEFINE_THREAD_CONSTANT(SUSPEND_REQUEST,    int32_t, art::kSuspendRequest)
 DEFINE_THREAD_CONSTANT(CHECKPOINT_REQUEST, int32_t, art::kCheckpointRequest)
+DEFINE_THREAD_CONSTANT(EMPTY_CHECKPOINT_REQUEST, int32_t, art::kEmptyCheckpointRequest)
+DEFINE_THREAD_CONSTANT(SUSPEND_OR_CHECKPOINT_REQUEST,  int32_t, art::kSuspendRequest | art::kCheckpointRequest | art::kEmptyCheckpointRequest)
 
 #undef DEFINE_THREAD_CONSTANT
diff --git a/tools/cpp-define-generator/generate-asm-support b/tools/cpp-define-generator/generate-asm-support
index f95648b..fcdf72f 100755
--- a/tools/cpp-define-generator/generate-asm-support
+++ b/tools/cpp-define-generator/generate-asm-support
@@ -5,4 +5,4 @@
 
 [[ -z ${ANDROID_BUILD_TOP+x} ]] && (echo "Run source build/envsetup.sh first" >&2 && exit 1)
 
-cpp-define-generator-datad > ${ANDROID_BUILD_TOP}/art/runtime/generated/asm_support_gen.h
+cpp-define-generator-data > ${ANDROID_BUILD_TOP}/art/runtime/generated/asm_support_gen.h
diff --git a/tools/cpp-define-generator/main.cc b/tools/cpp-define-generator/main.cc
index a1b463a..fc99f8a 100644
--- a/tools/cpp-define-generator/main.cc
+++ b/tools/cpp-define-generator/main.cc
@@ -59,12 +59,12 @@
 }
 
 template <typename T>
-void cpp_define(std::string name, T value) {
+void cpp_define(const std::string& name, T value) {
   std::cout << "#define " << name << " " << pretty_format(value) << std::endl;
 }
 
 template <typename T>
-void emit_check_eq(T value, std::string expr) {
+void emit_check_eq(T value, const std::string& expr) {
   std::cout << "DEFINE_CHECK_EQ(" << value << ", (" << expr << "))" << std::endl;
 }
 
diff --git a/tools/libcore_failures_concurrent_collector.txt b/tools/libcore_failures_concurrent_collector.txt
deleted file mode 100644
index 0e289a6..0000000
--- a/tools/libcore_failures_concurrent_collector.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-/*
- * This file contains expectations for ART's buildbot's concurrent collector
- * configurations. The purpose of this file is to temporary and quickly list
- * failing tests and not break the bots on the CC configurations, until they
- * are fixed or until the libcore expectation files get properly updated. The
- * script that uses this file is art/tools/run-libcore-tests.sh.
- *
- * It is also used to enable AOSP experiments, and not mess up with CTS's
- * expectations.
- */
-
-[
-]
diff --git a/tools/run-libcore-tests.sh b/tools/run-libcore-tests.sh
index 01c7f20..41faa69 100755
--- a/tools/run-libcore-tests.sh
+++ b/tools/run-libcore-tests.sh
@@ -43,10 +43,6 @@
 done
 
 expectations="--expectations art/tools/libcore_failures.txt"
-if [ "x$ART_USE_READ_BARRIER" = xtrue ]; then
-  # Tolerate some more failures on the concurrent collector configurations.
-  expectations="$expectations --expectations art/tools/libcore_failures_concurrent_collector.txt"
-fi
 
 emulator="no"
 if [ "$ANDROID_SERIAL" = "emulator-5554" ]; then