Updates to tracer to log events.

The tracer can now generates traces for ddms or output to a logfile.
Also includes bugfixes to allow stack walking to work properly when
tracing.

Change-Id: I8894272d9a678eeb1d376734f7822daf2ab298e4
diff --git a/src/runtime_support.cc b/src/runtime_support.cc
index 4eaa760..cdd773f 100644
--- a/src/runtime_support.cc
+++ b/src/runtime_support.cc
@@ -1183,10 +1183,11 @@
 }
 
 extern "C" const void* artTraceMethodEntryFromCode(Method* method, Thread* self, uintptr_t lr) {
-  LOG(INFO) << "Tracer - entering: " << PrettyMethod(method);
   TraceStackFrame trace_frame = TraceStackFrame(method, lr);
   self->PushTraceStackFrame(trace_frame);
 
+  Trace::LogMethodTraceEvent(self, method, Trace::kMethodTraceEnter);
+
   return Trace::GetSavedCodeFromMap(method);
 }
 
@@ -1194,7 +1195,8 @@
   TraceStackFrame trace_frame = Thread::Current()->PopTraceStackFrame();
   Method* method = trace_frame.method_;
   uintptr_t lr = trace_frame.return_pc_;
-  LOG(INFO) << "Tracer - exiting: " << PrettyMethod(method);
+
+  Trace::LogMethodTraceEvent(Thread::Current(), method, Trace::kMethodTraceExit);
 
   return lr;
 }
@@ -1203,7 +1205,8 @@
   TraceStackFrame trace_frame = self->PopTraceStackFrame();
   Method* method = trace_frame.method_;
   uintptr_t lr = trace_frame.return_pc_;
-  LOG(INFO) << "Tracer - unwinding: " << PrettyMethod(method);
+
+  Trace::LogMethodTraceEvent(self, method, Trace::kMethodTraceUnwind);
 
   return lr;
 }
diff --git a/src/thread.cc b/src/thread.cc
index 92d358c..7e38290 100644
--- a/src/thread.cc
+++ b/src/thread.cc
@@ -1097,6 +1097,9 @@
 void Thread::WalkStack(StackVisitor* visitor) const {
   Frame frame = GetTopOfStack();
   uintptr_t pc = ManglePc(top_of_managed_stack_pc_);
+#if defined(__arm__)
+  uint32_t trace_stack_depth = 0;
+#endif
   // TODO: enable this CHECK after native_to_managed_record_ is initialized during startup.
   // CHECK(native_to_managed_record_ != NULL);
   NativeToManagedRecord* record = native_to_managed_record_;
@@ -1106,6 +1109,15 @@
       // DCHECK(frame.GetMethod()->IsWithinCode(pc));  // TODO: restore IsWithinCode
       visitor->VisitFrame(frame, pc);
       pc = ManglePc(frame.GetReturnPC());
+      if (Trace::IsMethodTracingActive()) {
+#if defined(__arm__)
+        uintptr_t trace_exit = reinterpret_cast<uintptr_t>(art_trace_exit_from_code);
+        if (ManglePc(trace_exit) == pc) {
+          TraceStackFrame trace_frame = GetTraceStackFrame(trace_stack_depth++);
+          pc = ManglePc(trace_frame.return_pc_);
+        }
+#endif
+      }
     }
     if (record == NULL) {
       break;
@@ -1474,7 +1486,6 @@
     if (false) {
       LOG(INFO) << "Visiting stack roots in " << PrettyMethod(m, false)
                 << StringPrintf("@ PC:%04x", m->ToDexPC(pc));
-
     }
     // Process register map (which native and callee save methods don't have)
     if (!m->IsNative() && !m->IsCalleeSaveMethod() && !m->IsProxyMethod()) {
diff --git a/src/thread.h b/src/thread.h
index d24ddca..c5fb914 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -472,6 +472,10 @@
     return trace_stack_->empty();
   }
 
+  TraceStackFrame GetTraceStackFrame(uint32_t depth) const {
+    return trace_stack_->at(trace_stack_->size() - depth - 1);
+  }
+
   void PushTraceStackFrame(const TraceStackFrame& frame) {
     trace_stack_->push_back(frame);
   }
diff --git a/src/trace.cc b/src/trace.cc
index 0a41a2e..56e1eac 100644
--- a/src/trace.cc
+++ b/src/trace.cc
@@ -2,13 +2,109 @@
 
 #include "trace.h"
 
+#include <sys/uio.h>
+
 #include "class_linker.h"
+#include "debugger.h"
 #include "dex_cache.h"
+#include "object_utils.h"
+#include "os.h"
 #include "runtime_support.h"
 #include "thread.h"
 
+static const uint32_t kTraceMethodActionMask      = 0x03; // two bits
+static const char     kTraceTokenChar             = '*';
+static const uint16_t kTraceHeaderLength          = 32;
+static const uint32_t kTraceMagicValue            = 0x574f4c53;
+static const uint16_t kTraceVersionSingleClock    = 2;
+static const uint16_t kTraceVersionDualClock      = 3;
+static const uint16_t kTraceRecordSizeSingleClock = 10; // using v2
+static const uint16_t kTraceRecordSizeDualClock   = 14; // using v3 with two timestamps
+
+static inline uint32_t TraceMethodId(uint32_t methodValue) {
+  return (methodValue & ~kTraceMethodActionMask);
+}
+static inline uint32_t TraceMethodCombine(uint32_t method, uint8_t traceEvent) {
+  return (method | traceEvent);
+}
+
 namespace art {
 
+// TODO: Replace class statics with singleton instance
+bool Trace::method_tracing_active_ = false;
+std::map<const Method*, const void*> Trace::saved_code_map_;
+std::set<const Method*> Trace::visited_methods_;
+std::map<Thread*, uint64_t> Trace::thread_clock_base_map_;
+uint8_t* Trace::buf_;
+File* Trace::trace_file_;
+bool Trace::direct_to_ddms_ = false;
+int Trace::buffer_size_ = 0;
+uint64_t Trace::start_time_ = 0;
+bool Trace::overflow_ = false;
+uint16_t Trace::trace_version_;
+uint16_t Trace::record_size_;
+volatile int32_t Trace::cur_offset_;
+
+bool UseThreadCpuClock() {
+  // TODO: Allow control over which clock is used
+  return true;
+}
+
+bool UseWallClock() {
+  // TODO: Allow control over which clock is used
+  return true;
+}
+
+void MeasureClockOverhead() {
+  if (UseThreadCpuClock()) {
+    ThreadCpuMicroTime();
+  }
+  if (UseWallClock()) {
+    MicroTime();
+  }
+}
+
+uint32_t GetClockOverhead() {
+  uint64_t start = ThreadCpuMicroTime();
+
+  for (int i = 4000; i > 0; i--) {
+    MeasureClockOverhead();
+    MeasureClockOverhead();
+    MeasureClockOverhead();
+    MeasureClockOverhead();
+    MeasureClockOverhead();
+    MeasureClockOverhead();
+    MeasureClockOverhead();
+    MeasureClockOverhead();
+  }
+
+  uint64_t elapsed = ThreadCpuMicroTime() - start;
+  return uint32_t (elapsed / 32);
+}
+
+void Append2LE(uint8_t* buf, uint16_t val) {
+  *buf++ = (uint8_t) val;
+  *buf++ = (uint8_t) (val >> 8);
+}
+
+void Append4LE(uint8_t* buf, uint32_t val) {
+  *buf++ = (uint8_t) val;
+  *buf++ = (uint8_t) (val >> 8);
+  *buf++ = (uint8_t) (val >> 16);
+  *buf++ = (uint8_t) (val >> 24);
+}
+
+void Append8LE(uint8_t* buf, uint64_t val) {
+  *buf++ = (uint8_t) val;
+  *buf++ = (uint8_t) (val >> 8);
+  *buf++ = (uint8_t) (val >> 16);
+  *buf++ = (uint8_t) (val >> 24);
+  *buf++ = (uint8_t) (val >> 32);
+  *buf++ = (uint8_t) (val >> 40);
+  *buf++ = (uint8_t) (val >> 48);
+  *buf++ = (uint8_t) (val >> 56);
+}
+
 #if defined(__arm__)
 static bool InstallStubsClassVisitor(Class* klass, void* trace_stub) {
   for (size_t i = 0; i < klass->NumDirectMethods(); i++) {
@@ -89,28 +185,30 @@
 }
 #endif
 
-bool Trace::method_tracing_active_ = false;
-std::map<const Method*, const void*> Trace::saved_code_map_;
-
 void Trace::AddSavedCodeToMap(const Method* method, const void* code) {
+  CHECK(IsMethodTracingActive());
   saved_code_map_.insert(std::make_pair(method, code));
 }
 
 void Trace::RemoveSavedCodeFromMap(const Method* method) {
+  CHECK(IsMethodTracingActive());
   saved_code_map_.erase(method);
 }
 
 const void* Trace::GetSavedCodeFromMap(const Method* method) {
+  CHECK(IsMethodTracingActive());
   return saved_code_map_.find(method)->second;
 }
 
 void Trace::SaveAndUpdateCode(Method* method, const void* new_code) {
+  CHECK(IsMethodTracingActive());
   CHECK(GetSavedCodeFromMap(method) == NULL);
   AddSavedCodeToMap(method, method->GetCode());
   method->SetCode(new_code);
 }
 
 void Trace::ResetSavedCode(Method* method) {
+  CHECK(IsMethodTracingActive());
   CHECK(GetSavedCodeFromMap(method) != NULL);
   method->SetCode(GetSavedCodeFromMap(method));
   RemoveSavedCodeFromMap(method);
@@ -127,39 +225,222 @@
 void Trace::Start(const char* trace_filename, int trace_fd, int buffer_size, int flags, bool direct_to_ddms) {
   LOG(INFO) << "Starting method tracing...";
   if (IsMethodTracingActive()) {
-    // TODO: Stop the trace, then start it up again instead of returning
-    LOG(INFO) << "Trace already in progress, stopping";
+    // TODO: Stop the trace, then start it up again instead of returning.
+    LOG(INFO) << "Trace already in progress, ignoring this request";
     return;
   }
 
+  // Suspend all threads.
+  ScopedThreadStateChange tsc(Thread::Current(), Thread::kRunnable);
+  Runtime::Current()->GetThreadList()->SuspendAll(false);
+
+  // Open files and allocate storage.
+  if (!direct_to_ddms) {
+    if (trace_fd < 0) {
+      trace_file_ = OS::OpenFile(trace_filename, true);
+    } else {
+      trace_file_ = OS::FileFromFd("tracefile", trace_fd);
+    }
+    if (trace_file_ == NULL) {
+      PLOG(ERROR) << "Unable to open trace file '" << trace_filename;
+      Thread::Current()->ThrowNewException("Ljava/lang/RuntimeException;",
+          StringPrintf("Unable to open trace file '%s'", trace_filename).c_str());
+      Runtime::Current()->GetThreadList()->ResumeAll(false);
+      return;
+    }
+  }
+  buf_ = new uint8_t[buffer_size]();
+
+  // Populate profiler state.
+  direct_to_ddms_ = direct_to_ddms;
+  buffer_size_ = buffer_size;
+  overflow_ = false;
+  start_time_ = MicroTime();
+
+  if (UseThreadCpuClock() && UseWallClock()) {
+    trace_version_ = kTraceVersionDualClock;
+    record_size_ = kTraceRecordSizeDualClock;
+  } else {
+    trace_version_ = kTraceVersionSingleClock;
+    record_size_ = kTraceRecordSizeSingleClock;
+  }
+
+  saved_code_map_.clear();
+  visited_methods_.clear();
+  thread_clock_base_map_.clear();
+
+  // Set up the beginning of the trace.
+  memset(buf_, 0, kTraceHeaderLength);
+  Append4LE(buf_, kTraceMagicValue);
+  Append2LE(buf_ + 4, trace_version_);
+  Append2LE(buf_ + 6, kTraceHeaderLength);
+  Append8LE(buf_ + 8, start_time_);
+  if (trace_version_ >= kTraceVersionDualClock) {
+    Append2LE(buf_ + 16, record_size_);
+  }
+  cur_offset_ = kTraceHeaderLength;
+
   SetMethodTracingActive(true);
+
+  // Install all method tracing stubs.
   InstallStubs();
   LOG(INFO) << "Method tracing started";
+
+  Runtime::Current()->GetThreadList()->ResumeAll(false);
 }
 
 void Trace::Stop() {
   LOG(INFO) << "Stopping method tracing...";
   if (!IsMethodTracingActive()) {
-    LOG(INFO) << "Trace stop requested, but not running";
+    LOG(INFO) << "Trace stop requested, but no trace currently running";
     return;
   }
 
+  // Suspend all threads.
+  ScopedThreadStateChange tsc(Thread::Current(), Thread::kRunnable);
+  Runtime::Current()->GetThreadList()->SuspendAll(false);
+
+  // Uninstall all method tracing stubs.
   UninstallStubs();
+
   SetMethodTracingActive(false);
+
+  // Compute elapsed time.
+  uint64_t elapsed = MicroTime() - start_time_;
+
+  size_t final_offset = cur_offset_;
+  uint32_t clock_overhead = GetClockOverhead();
+
+  GetVisitedMethods(final_offset);
+
+  std::ostringstream os;
+
+  os << StringPrintf("%cversion\n", kTraceTokenChar);
+  os << StringPrintf("%d\n", trace_version_);
+  os << StringPrintf("data-file-overflow=%s\n", overflow_ ? "true" : "false");
+  if (UseThreadCpuClock()) {
+    if (UseWallClock()) {
+      os << StringPrintf("clock=dual\n");
+    } else {
+      os << StringPrintf("clock=thread-cpu\n");
+    }
+  } else {
+    os << StringPrintf("clock=wall\n");
+  }
+  os << StringPrintf("elapsed-time-usec=%llu\n", elapsed);
+  os << StringPrintf("num-method-calls=%d\n", (final_offset - kTraceHeaderLength) / record_size_);
+  os << StringPrintf("clock-call-overhead-nsec=%d\n", clock_overhead);
+  os << StringPrintf("vm=art\n");
+  os << StringPrintf("%cthreads\n", kTraceTokenChar);
+  DumpThreadList(os);
+  os << StringPrintf("%cmethods\n", kTraceTokenChar);
+  DumpMethodList(os);
+  os << StringPrintf("%cend\n", kTraceTokenChar);
+
+  std::string header(os.str());
+  if (direct_to_ddms_) {
+    struct iovec iov[2];
+    iov[0].iov_base = reinterpret_cast<void*>(const_cast<char*>(header.c_str()));
+    iov[0].iov_len = header.length();
+    iov[1].iov_base = buf_;
+    iov[1].iov_len = final_offset;
+    Dbg::DdmSendChunkV(CHUNK_TYPE("MPSE"), iov, 2);
+  } else {
+    if (!trace_file_->WriteFully(header.c_str(), header.length()) ||
+        !trace_file_->WriteFully(buf_, final_offset)) {
+      int err = errno;
+      LOG(ERROR) << "Trace data write failed: " << strerror(err);
+      Thread::Current()->ThrowNewException("Ljava/lang/RuntimeException;",
+          StringPrintf("Trace data write failed: %s", strerror(err)).c_str());
+    }
+    delete trace_file_;
+  }
+
+  delete buf_;
+
   LOG(INFO) << "Method tracing stopped";
+
+  Runtime::Current()->GetThreadList()->ResumeAll(false);
+}
+
+void Trace::LogMethodTraceEvent(Thread* self, const Method* method, Trace::TraceEvent event) {
+  if (thread_clock_base_map_.find(self) == thread_clock_base_map_.end()) {
+    uint64_t time = ThreadCpuMicroTime();
+    thread_clock_base_map_.insert(std::make_pair(self, time));
+  }
+
+  // Advance cur_offset_ atomically.
+  int32_t new_offset;
+  int32_t old_offset;
+  do {
+    old_offset = cur_offset_;
+    new_offset = old_offset + record_size_;
+    if (new_offset > buffer_size_) {
+      overflow_ = true;
+      return;
+    }
+  } while (android_atomic_release_cas(old_offset, new_offset, &cur_offset_) != 0);
+
+  uint32_t method_value = TraceMethodCombine(reinterpret_cast<uint32_t>(method), event);
+
+  // Write data
+  uint8_t* ptr = buf_ + old_offset;
+  Append2LE(ptr, self->GetTid());
+  Append4LE(ptr + 2, method_value);
+  ptr += 6;
+
+  if (UseThreadCpuClock()) {
+    uint64_t thread_clock_base = thread_clock_base_map_.find(self)->second;
+    uint32_t thread_clock_diff = ThreadCpuMicroTime() - thread_clock_base;
+    Append4LE(ptr, thread_clock_diff);
+    ptr += 4;
+  }
+
+  if (UseWallClock()) {
+    uint32_t wall_clock_diff = MicroTime() - start_time_;
+    Append4LE(ptr, wall_clock_diff);
+  }
+}
+
+void Trace::GetVisitedMethods(size_t end_offset) {
+  uint8_t* ptr = buf_ + kTraceHeaderLength;
+  uint8_t* end = buf_ + end_offset;
+
+  while (ptr < end) {
+    uint32_t method_value = ptr[2] | (ptr[3] << 8) | (ptr[4] << 16) | (ptr[5] << 24);
+    Method* method = reinterpret_cast<Method*>(TraceMethodId(method_value));
+    visited_methods_.insert(method);
+    ptr += record_size_;
+  }
+}
+
+void Trace::DumpMethodList(std::ostream& os) {
+  typedef std::set<const Method*>::const_iterator It; // TODO: C++0x auto
+  for (It it = visited_methods_.begin(); it != visited_methods_.end(); ++it) {
+    const Method* method = *it;
+    MethodHelper mh(method);
+    os << StringPrintf("0x%08x\t%s\t%s\t%s\t%s\t%d\n", (int) method,
+        PrettyDescriptor(mh.GetDeclaringClassDescriptor()).c_str(), mh.GetName(),
+        mh.GetSignature().c_str(), mh.GetDeclaringClassSourceFile(),
+        mh.GetLineNumFromNativePC(0));
+  }
+  visited_methods_.clear();
+}
+
+static void DumpThread(Thread* t, void* arg) {
+  std::ostream* os = reinterpret_cast<std::ostream*>(arg);
+  *os << StringPrintf("%d\t%s\n", t->GetTid(), t->GetName()->ToModifiedUtf8().c_str());
+}
+
+void Trace::DumpThreadList(std::ostream& os) {
+  ScopedThreadListLock thread_list_lock;
+  Runtime::Current()->GetThreadList()->ForEach(DumpThread, &os);
 }
 
 void Trace::InstallStubs() {
 #if defined(__arm__)
-  {
-    ScopedThreadStateChange tsc(Thread::Current(), Thread::kRunnable);
-    Runtime::Current()->GetThreadList()->SuspendAll(false);
-  }
-
   void* trace_stub = reinterpret_cast<void*>(art_trace_entry_from_code);
   Runtime::Current()->GetClassLinker()->VisitClasses(InstallStubsClassVisitor, trace_stub);
-
-  Runtime::Current()->GetThreadList()->ResumeAll(false);
 #else
   UNIMPLEMENTED(WARNING);
 #endif
@@ -167,11 +448,6 @@
 
 void Trace::UninstallStubs() {
 #if defined(__arm__)
-  {
-    ScopedThreadStateChange tsc(Thread::Current(), Thread::kRunnable);
-    Runtime::Current()->GetThreadList()->SuspendAll(false);
-  }
-
   void* trace_stub = reinterpret_cast<void*>(art_trace_entry_from_code);
   Runtime::Current()->GetClassLinker()->VisitClasses(UninstallStubsClassVisitor, trace_stub);
 
@@ -180,8 +456,6 @@
     ScopedThreadListLock thread_list_lock;
     Runtime::Current()->GetThreadList()->ForEach(TraceRestoreStack, NULL);
   }
-
-  Runtime::Current()->GetThreadList()->ResumeAll(false);
 #else
   UNIMPLEMENTED(WARNING);
 #endif
diff --git a/src/trace.h b/src/trace.h
index 4c0edda..9637163 100644
--- a/src/trace.h
+++ b/src/trace.h
@@ -4,13 +4,18 @@
 #define ART_SRC_TRACE_H_
 
 #include <map>
+#include <ostream>
+#include <set>
+#include <string>
 
+#include "file.h"
 #include "globals.h"
 #include "macros.h"
 
 namespace art {
 
 class Method;
+class Thread;
 
 struct TraceStackFrame {
   TraceStackFrame(Method* method, uintptr_t return_pc)
@@ -23,9 +28,18 @@
 
 class Trace {
  public:
+
+  enum TraceEvent {
+    kMethodTraceEnter = 0,
+    kMethodTraceExit = 1,
+    kMethodTraceUnwind = 2,
+  };
+
   static void Start(const char* trace_filename, int trace_fd, int buffer_size, int flags, bool direct_to_ddms);
   static void Stop();
 
+  static void LogMethodTraceEvent(Thread* self, const Method* method, TraceEvent event);
+
   static bool IsMethodTracingActive();
   static void SetMethodTracingActive(bool value);
 
@@ -43,11 +57,32 @@
   // Restores original code for each method and fixes the return values of each thread's stack.
   static void UninstallStubs();
 
+  // Methods to output traced methods and threads.
+  static void GetVisitedMethods(size_t end_offset);
+  static void DumpMethodList(std::ostream& os);
+  static void DumpThreadList(std::ostream& os);
+
   static bool method_tracing_active_;
 
   // Maps a method to its original code pointer
   static std::map<const Method*, const void*> saved_code_map_;
 
+  // Set of methods visited by the profiler
+  static std::set<const Method*> visited_methods_;
+
+  // Maps a thread to its clock base
+  static std::map<Thread*, uint64_t> thread_clock_base_map_;
+
+  static uint8_t* buf_;
+  static File* trace_file_;
+  static bool direct_to_ddms_;
+  static int buffer_size_;
+  static uint64_t start_time_;
+  static bool overflow_;
+  static uint16_t trace_version_;
+  static uint16_t record_size_;
+  static volatile int32_t cur_offset_;
+
   DISALLOW_COPY_AND_ASSIGN(Trace);
 };
 
diff --git a/src/utils.cc b/src/utils.cc
index 0d6aff9..ced68de 100644
--- a/src/utils.cc
+++ b/src/utils.cc
@@ -56,12 +56,24 @@
   return static_cast<uint64_t>(now.tv_sec) * 1000LL + now.tv_nsec / 1000000LL;
 }
 
+uint64_t MicroTime() {
+  struct timespec now;
+  clock_gettime(CLOCK_MONOTONIC, &now);
+  return static_cast<uint64_t>(now.tv_sec) * 1000000LL + now.tv_nsec / 1000LL;
+}
+
 uint64_t NanoTime() {
   struct timespec now;
   clock_gettime(CLOCK_MONOTONIC, &now);
   return static_cast<uint64_t>(now.tv_sec) * 1000000000LL + now.tv_nsec;
 }
 
+uint64_t ThreadCpuMicroTime() {
+  struct timespec now;
+  clock_gettime(CLOCK_THREAD_CPUTIME_ID, &now);
+  return static_cast<uint64_t>(now.tv_sec) * 1000000LL + now.tv_nsec / 1000LL;
+}
+
 std::string PrettyDescriptor(const String* java_descriptor) {
   if (java_descriptor == NULL) {
     return "null";
diff --git a/src/utils.h b/src/utils.h
index 5b3a036..b402b46 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -211,9 +211,15 @@
 // Returns the current time in milliseconds (using the POSIX CLOCK_MONOTONIC).
 uint64_t MilliTime();
 
+// Returns the current time in microseconds (using the POSIX CLOCK_MONOTONIC).
+uint64_t MicroTime();
+
 // Returns the current time in nanoseconds (using the POSIX CLOCK_MONOTONIC).
 uint64_t NanoTime();
 
+// Returns the current time in microseconds (using the POSIX CLOCK_THREAD_CPUTIME_ID).
+uint64_t ThreadCpuMicroTime();
+
 // Splits a string using the given delimiter character into a vector of
 // strings. Empty strings will be omitted.
 void Split(const std::string& s, char delim, std::vector<std::string>& result);