ART: add weighted allocated bytes metrics
`process_cpu_start_time_` is moved from `art::Runtime` to `art::gc:Heap`
so that it could be reset when the app starts (current process is forked
from Zygote).
`process_cpu_end_time_` is removed from `art::Runtime` because it's only
used when the runtime is about to shutdown, so it suffices to capture it
in a local variable.
Test: Run art with -XX:DumpGCPerformanceOnShutdown on some benchmarks.
Bug: 112187497
Change-Id: I154fdb6acdf4e0c21dff835807bd4e2cf311e3d1
diff --git a/runtime/gc/collector/garbage_collector.cc b/runtime/gc/collector/garbage_collector.cc
index 8477c9d..46ff7dc 100644
--- a/runtime/gc/collector/garbage_collector.cc
+++ b/runtime/gc/collector/garbage_collector.cc
@@ -90,6 +90,7 @@
Thread* self = Thread::Current();
uint64_t start_time = NanoTime();
uint64_t thread_cpu_start_time = ThreadCpuNanoTime();
+ GetHeap()->CalculateWeightedAllocatedBytes();
Iteration* current_iteration = GetCurrentIteration();
current_iteration->Reset(gc_cause, clear_soft_references);
// Note transaction mode is single-threaded and there's no asynchronous GC and this flag doesn't
diff --git a/runtime/gc/heap.cc b/runtime/gc/heap.cc
index f767360..86135c1 100644
--- a/runtime/gc/heap.cc
+++ b/runtime/gc/heap.cc
@@ -210,6 +210,9 @@
low_memory_mode_(low_memory_mode),
long_pause_log_threshold_(long_pause_log_threshold),
long_gc_log_threshold_(long_gc_log_threshold),
+ process_cpu_start_time_ns_(ProcessCpuNanoTime()),
+ last_process_cpu_time_ns_(process_cpu_start_time_ns_),
+ weighted_allocated_bytes_(0u),
ignore_max_footprint_(ignore_max_footprint),
zygote_creation_lock_("zygote creation lock", kZygoteCreationLock),
zygote_space_(nullptr),
@@ -1062,6 +1065,14 @@
}
}
+void Heap::CalculateWeightedAllocatedBytes() {
+ uint64_t current_process_cpu_time = ProcessCpuNanoTime();
+ uint64_t bytes_allocated = GetBytesAllocated();
+ uint64_t weight = current_process_cpu_time - last_process_cpu_time_ns_;
+ weighted_allocated_bytes_ += weight * bytes_allocated;
+ last_process_cpu_time_ns_ = current_process_cpu_time;
+}
+
uint64_t Heap::GetTotalGcCpuTime() {
uint64_t sum = 0;
for (auto* collector : garbage_collectors_) {
@@ -1139,6 +1150,11 @@
for (auto* collector : garbage_collectors_) {
collector->ResetMeasurements();
}
+
+ process_cpu_start_time_ns_ = ProcessCpuNanoTime();
+ last_process_cpu_time_ns_ = process_cpu_start_time_ns_;
+ weighted_allocated_bytes_ = 0u;
+
total_bytes_freed_ever_ = 0;
total_objects_freed_ever_ = 0;
total_wait_time_ = 0;
diff --git a/runtime/gc/heap.h b/runtime/gc/heap.h
index a43f315..411a446 100644
--- a/runtime/gc/heap.h
+++ b/runtime/gc/heap.h
@@ -395,8 +395,17 @@
REQUIRES(!Locks::heap_bitmap_lock_)
REQUIRES(Locks::mutator_lock_);
+ uint64_t GetWeightedAllocatedBytes() const {
+ return weighted_allocated_bytes_;
+ }
+
+ void CalculateWeightedAllocatedBytes();
uint64_t GetTotalGcCpuTime();
+ uint64_t GetProcessCpuStartTime() const {
+ return process_cpu_start_time_ns_;
+ }
+
// Set target ideal heap utilization ratio, implements
// dalvik.system.VMRuntime.setTargetHeapUtilization.
void SetTargetHeapUtilization(float target);
@@ -1161,6 +1170,15 @@
// If we get a GC longer than long GC log threshold, then we print out the GC after it finishes.
const size_t long_gc_log_threshold_;
+ // Starting time of the new process; meant to be used for measuring total process CPU time.
+ uint64_t process_cpu_start_time_ns_;
+
+ // Last time GC started; meant to be used to measure the duration between two GCs.
+ uint64_t last_process_cpu_time_ns_;
+
+ // allocated_bytes * (current_process_cpu_time - last_process_cpu_time)
+ uint64_t weighted_allocated_bytes_;
+
// If we ignore the max footprint it lets the heap grow until it hits the heap capacity, this is
// useful for benchmarking since it reduces time spent in GC to a low %.
const bool ignore_max_footprint_;
diff --git a/runtime/runtime.cc b/runtime/runtime.cc
index ab79b9e..5323cee 100644
--- a/runtime/runtime.cc
+++ b/runtime/runtime.cc
@@ -278,7 +278,6 @@
// Initially assume we perceive jank in case the process state is never updated.
process_state_(kProcessStateJankPerceptible),
zygote_no_threads_(false),
- process_cpu_start_time_(ProcessCpuNanoTime()),
verifier_logging_threshold_ms_(100) {
static_assert(Runtime::kCalleeSaveSize ==
static_cast<uint32_t>(CalleeSaveType::kLastCalleeSaveType), "Unexpected size");
@@ -322,20 +321,26 @@
}
if (dump_gc_performance_on_shutdown_) {
- process_cpu_end_time_ = ProcessCpuNanoTime();
+ heap_->CalculateWeightedAllocatedBytes();
+ uint64_t process_cpu_end_time = ProcessCpuNanoTime();
ScopedLogSeverity sls(LogSeverity::INFO);
// This can't be called from the Heap destructor below because it
// could call RosAlloc::InspectAll() which needs the thread_list
// to be still alive.
heap_->DumpGcPerformanceInfo(LOG_STREAM(INFO));
- uint64_t process_cpu_time = process_cpu_end_time_ - process_cpu_start_time_;
+ uint64_t process_cpu_time = process_cpu_end_time - heap_->GetProcessCpuStartTime();
uint64_t gc_cpu_time = heap_->GetTotalGcCpuTime();
float ratio = static_cast<float>(gc_cpu_time) / process_cpu_time;
LOG_STREAM(INFO) << "GC CPU time " << PrettyDuration(gc_cpu_time)
<< " out of process CPU time " << PrettyDuration(process_cpu_time)
<< " (" << ratio << ")"
<< "\n";
+ float weighted_allocated_bytes =
+ static_cast<float>(heap_->GetWeightedAllocatedBytes()) / process_cpu_time;
+ LOG_STREAM(INFO) << "Weighted bytes allocated over CPU time: "
+ << " (" << PrettySize(weighted_allocated_bytes) << ")"
+ << "\n";
}
if (jit_ != nullptr) {
diff --git a/runtime/runtime.h b/runtime/runtime.h
index 0ccc7b7..1d58ad7 100644
--- a/runtime/runtime.h
+++ b/runtime/runtime.h
@@ -1101,9 +1101,6 @@
MemMap protected_fault_page_;
- uint64_t process_cpu_start_time_;
- uint64_t process_cpu_end_time_;
-
uint32_t verifier_logging_threshold_ms_;
DISALLOW_COPY_AND_ASSIGN(Runtime);