Compiler: replace DOM traversal computation

Originally the old trace JIT used a few recursive graph walking
algorithms - which was perfectly reasonable given that the graph
size was capped at a few dozen nodes at most.  These were replaced
with iterative walk order computations  - or at least I thought
they all were.  Missed one of them, which caused a stack overflow
on a pathologically large method compilation.

Renaming of some arena_allocator items for consistency and clarity.
More detailed memory usage logging.  Reworked the allocator to waste
less space when an allocation doesn't fit and a new block must be
allocated.

Change-Id: I4d84dded3c47819eefa0de90ebb821dd12eb8be8
diff --git a/src/compiler/dex/arena_allocator.cc b/src/compiler/dex/arena_allocator.cc
index 7f1bfb3..3a3e385 100644
--- a/src/compiler/dex/arena_allocator.cc
+++ b/src/compiler/dex/arena_allocator.cc
@@ -41,12 +41,14 @@
   : default_size_(default_size),
     block_size_(default_size - sizeof(ArenaMemBlock)),
     arena_head_(NULL),
-    current_arena_(NULL),
+    current_block_(NULL),
     num_arena_blocks_(0),
-    malloc_bytes_(0) {
+    malloc_bytes_(0),
+    lost_bytes_(0),
+    num_allocations_(0) {
   memset(&alloc_stats_[0], 0, sizeof(alloc_stats_));
   // Start with an empty arena.
-  arena_head_ = current_arena_ = EmptyArena();
+  arena_head_ = current_block_ = EmptyArenaBlock();
   num_arena_blocks_++;
 }
 
@@ -58,12 +60,12 @@
     head = head->next;
     free(p);
   }
-  arena_head_ = current_arena_ = NULL;
+  arena_head_ = NULL;
   num_arena_blocks_ = 0;
 }
 
 // Return an arena with no storage for use as a sentinal.
-ArenaAllocator::ArenaMemBlock* ArenaAllocator::EmptyArena() {
+ArenaAllocator::ArenaMemBlock* ArenaAllocator::EmptyArenaBlock() {
   ArenaMemBlock* res = static_cast<ArenaMemBlock*>(malloc(sizeof(ArenaMemBlock)));
   malloc_bytes_ += sizeof(ArenaMemBlock);
   res->block_size = 0;
@@ -74,32 +76,56 @@
 
 // Arena-based malloc for compilation tasks.
 void* ArenaAllocator::NewMem(size_t size, bool zero, ArenaAllocKind kind) {
-  DCHECK(current_arena_ != NULL);
+  DCHECK(current_block_ != NULL);
   DCHECK(arena_head_ != NULL);
   size = (size + 3) & ~3;
   alloc_stats_[kind] += size;
-  if (size + current_arena_->bytes_allocated > current_arena_->block_size) {
-    size_t block_size = (size <= block_size_) ?  block_size_ : size;
-    /* Time to allocate a new block */
-    size_t allocation_size = sizeof(ArenaMemBlock) + block_size;
-    ArenaMemBlock *new_arena =
-        static_cast<ArenaMemBlock*>(malloc(allocation_size));
-    if (new_arena == NULL) {
+  ArenaMemBlock* allocation_block = current_block_;  // Assume we'll fit.
+  size_t remaining_space = current_block_->block_size - current_block_->bytes_allocated;
+  if (remaining_space < size) {
+    /*
+     * Time to allocate a new block.  If this is a large allocation or we have
+     * significant space remaining in the current block then fulfill the allocation
+     * request with a custom-sized malloc() - otherwise grab a new standard block.
+     */
+    size_t allocation_size = sizeof(ArenaMemBlock);
+    if ((remaining_space >= ARENA_HIGH_WATER) || (size > block_size_)) {
+      allocation_size += size;
+    } else {
+      allocation_size += block_size_;
+    }
+    ArenaMemBlock *new_block = static_cast<ArenaMemBlock*>(malloc(allocation_size));
+    if (new_block == NULL) {
       LOG(FATAL) << "Arena allocation failure";
     }
     malloc_bytes_ += allocation_size;
-    new_arena->block_size = block_size;
-    new_arena->bytes_allocated = 0;
-    new_arena->next = NULL;
-    current_arena_->next = new_arena;
-    current_arena_ = new_arena;
+    new_block->block_size = allocation_size - sizeof(ArenaMemBlock);
+    new_block->bytes_allocated = 0;
+    new_block->next = NULL;
     num_arena_blocks_++;
+    /*
+     * If the new block is completely full, insert it into the head of the list so we don't
+     * bother trying to fit more and won't hide the potentially allocatable space on the
+     * last (current_block_) block.  TUNING: if we move to a mark scheme, revisit
+     * this code to keep allocation order intact.
+     */
+    if (new_block->block_size == size) {
+      new_block->next = arena_head_;
+      arena_head_ = new_block;
+    } else {
+      int lost = (current_block_->block_size - current_block_->bytes_allocated);
+      lost_bytes_ += lost;
+      current_block_->next = new_block;
+      current_block_ = new_block;
+    }
+    allocation_block = new_block;
   }
-  void* ptr = &current_arena_->ptr[current_arena_->bytes_allocated];
-  current_arena_->bytes_allocated += size;
+  void* ptr = &allocation_block->ptr[allocation_block->bytes_allocated];
+  allocation_block->bytes_allocated += size;
   if (zero) {
     memset(ptr, 0, size);
   }
+  num_allocations_++;
   return ptr;
 }
 
@@ -109,9 +135,10 @@
   for (int i = 0; i < kNumAllocKinds; i++) {
     total += alloc_stats_[i];
   }
-  os << " MEM: used: " << total << ", allocated: " << malloc_bytes_ << ", wasted: "
-     << malloc_bytes_ - (total + (num_arena_blocks_ * sizeof(ArenaMemBlock))) << "\n";
-  os << "Number of arenas allocated: " << num_arena_blocks_ << "\n";
+  os << " MEM: used: " << total << ", allocated: " << malloc_bytes_
+     << ", lost: " << lost_bytes_ << "\n";
+  os << "Number of blocks allocated: " << num_arena_blocks_ << ", Number of allocations: "
+     << num_allocations_ << ", avg: " << total / num_allocations_ << "\n";
   os << "===== Allocation by kind\n";
   for (int i = 0; i < kNumAllocKinds; i++) {
       os << alloc_names[i] << std::setw(10) << alloc_stats_[i] << "\n";