optimizing: add block-scoped constructor fence merging pass

Introduce a new "Constructor Fence Redundancy Elimination" pass.
The pass currently performs local optimization only, i.e. within instructions
in the same basic block.

All constructor fences preceding a publish (e.g. store, invoke) get
merged into one instruction.

==============

OptStat#ConstructorFenceGeneratedNew:   43825
OptStat#ConstructorFenceGeneratedFinal: 17631  <+++
OptStat#ConstructorFenceRemovedLSE:     164
OptStat#ConstructorFenceRemovedPFRA:    9391
OptStat#ConstructorFenceRemovedCFRE:    16133  <---

Removes ~91.5% of the 'final' constructor fences in RitzBenchmark:

(We do not distinguish the exact reason that a fence was created, so
it's possible some "new" fences were also removed.)

==============

Test: art/test/run-test --host --optimizing 476-checker-ctor-fence-redun-elim
Bug: 36656456
Change-Id: I8020217b448ad96ce9b7640aa312ae784690ad99
diff --git a/compiler/optimizing/optimizing_compiler.cc b/compiler/optimizing/optimizing_compiler.cc
index 435ca1c..b45f3c6 100644
--- a/compiler/optimizing/optimizing_compiler.cc
+++ b/compiler/optimizing/optimizing_compiler.cc
@@ -55,6 +55,7 @@
 #include "compiled_method.h"
 #include "compiler.h"
 #include "constant_folding.h"
+#include "constructor_fence_redundancy_elimination.h"
 #include "dead_code_elimination.h"
 #include "debug/elf_debug_writer.h"
 #include "debug/method_debug_info.h"
@@ -516,6 +517,8 @@
     return new (arena) CHAGuardOptimization(graph);
   } else if (opt_name == CodeSinking::kCodeSinkingPassName) {
     return new (arena) CodeSinking(graph, stats);
+  } else if (opt_name == ConstructorFenceRedundancyElimination::kPassName) {
+    return new (arena) ConstructorFenceRedundancyElimination(graph, stats);
 #ifdef ART_ENABLE_CODEGEN_arm
   } else if (opt_name == arm::InstructionSimplifierArm::kInstructionSimplifierArmPassName) {
     return new (arena) arm::InstructionSimplifierArm(graph, stats);
@@ -786,6 +789,8 @@
   IntrinsicsRecognizer* intrinsics = new (arena) IntrinsicsRecognizer(graph, stats);
   CHAGuardOptimization* cha_guard = new (arena) CHAGuardOptimization(graph);
   CodeSinking* code_sinking = new (arena) CodeSinking(graph, stats);
+  ConstructorFenceRedundancyElimination* cfre =
+      new (arena) ConstructorFenceRedundancyElimination(graph, stats);
 
   HOptimization* optimizations1[] = {
     intrinsics,
@@ -823,6 +828,8 @@
     // can satisfy. For example, the code generator does not expect to see a
     // HTypeConversion from a type to the same type.
     simplify4,
+    cfre,  // Eliminate constructor fences after code sinking to avoid
+           // complicated sinking logic to split a fence with many inputs.
   };
   RunOptimizations(optimizations2, arraysize(optimizations2), pass_observer);