Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2013 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
David Sehr | c431b9d | 2018-03-02 12:01:51 -0800 | [diff] [blame] | 17 | #ifndef ART_LIBARTBASE_BASE_HISTOGRAM_INL_H_ |
| 18 | #define ART_LIBARTBASE_BASE_HISTOGRAM_INL_H_ |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 19 | |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 20 | #include <algorithm> |
| 21 | #include <cmath> |
| 22 | #include <limits> |
| 23 | #include <ostream> |
| 24 | |
Vladimir Marko | 80afd02 | 2015-05-19 18:08:00 +0100 | [diff] [blame] | 25 | #include "histogram.h" |
| 26 | |
Andreas Gampe | 5794381 | 2017-12-06 21:39:13 -0800 | [diff] [blame] | 27 | #include <android-base/logging.h> |
| 28 | |
David Sehr | 1979c64 | 2018-04-26 14:41:18 -0700 | [diff] [blame] | 29 | #include "bit_utils.h" |
| 30 | #include "time_utils.h" |
| 31 | #include "utils.h" |
Vladimir Marko | 80afd02 | 2015-05-19 18:08:00 +0100 | [diff] [blame] | 32 | |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 33 | namespace art { |
| 34 | |
| 35 | template <class Value> inline void Histogram<Value>::AddValue(Value value) { |
Mathieu Chartier | e5426c9 | 2013-08-01 13:55:42 -0700 | [diff] [blame] | 36 | CHECK_GE(value, static_cast<Value>(0)); |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 37 | if (value >= max_) { |
| 38 | Value new_max = ((value + 1) / bucket_width_ + 1) * bucket_width_; |
| 39 | DCHECK_GT(new_max, max_); |
| 40 | GrowBuckets(new_max); |
| 41 | } |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 42 | BucketiseValue(value); |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 43 | } |
| 44 | |
Mathieu Chartier | 70a596d | 2014-12-17 14:56:47 -0800 | [diff] [blame] | 45 | template <class Value> inline void Histogram<Value>::AdjustAndAddValue(Value value) { |
| 46 | AddValue(value / kAdjust); |
| 47 | } |
| 48 | |
Mathieu Chartier | 19b0a91 | 2013-11-20 14:07:54 -0800 | [diff] [blame] | 49 | template <class Value> inline Histogram<Value>::Histogram(const char* name) |
| 50 | : kAdjust(0), |
| 51 | kInitialBucketCount(0), |
| 52 | name_(name), |
Andreas Gampe | d9911ee | 2017-03-27 13:27:24 -0700 | [diff] [blame] | 53 | max_buckets_(0), |
| 54 | sample_size_(0) { |
Mathieu Chartier | 19b0a91 | 2013-11-20 14:07:54 -0800 | [diff] [blame] | 55 | } |
| 56 | |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 57 | template <class Value> |
Mathieu Chartier | e5426c9 | 2013-08-01 13:55:42 -0700 | [diff] [blame] | 58 | inline Histogram<Value>::Histogram(const char* name, Value initial_bucket_width, |
| 59 | size_t max_buckets) |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 60 | : kAdjust(1000), |
Hans Boehm | 1973687 | 2019-04-22 10:20:22 -0700 | [diff] [blame] | 61 | kInitialBucketCount(kMinBuckets), |
Mathieu Chartier | e5426c9 | 2013-08-01 13:55:42 -0700 | [diff] [blame] | 62 | name_(name), |
| 63 | max_buckets_(max_buckets), |
| 64 | bucket_width_(initial_bucket_width) { |
Hans Boehm | 1973687 | 2019-04-22 10:20:22 -0700 | [diff] [blame] | 65 | CHECK_GE(max_buckets, kInitialBucketCount); |
| 66 | CHECK_EQ(max_buckets_ % 2, 0u); |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 67 | Reset(); |
| 68 | } |
| 69 | |
| 70 | template <class Value> |
| 71 | inline void Histogram<Value>::GrowBuckets(Value new_max) { |
| 72 | while (max_ < new_max) { |
Mathieu Chartier | e5426c9 | 2013-08-01 13:55:42 -0700 | [diff] [blame] | 73 | // If we have reached the maximum number of buckets, merge buckets together. |
Hans Boehm | 1973687 | 2019-04-22 10:20:22 -0700 | [diff] [blame] | 74 | DCHECK_LE(frequency_.size(), max_buckets_); |
| 75 | if (frequency_.size() == max_buckets_) { |
| 76 | DCHECK_EQ(frequency_.size() % 2, 0u); |
Mathieu Chartier | e5426c9 | 2013-08-01 13:55:42 -0700 | [diff] [blame] | 77 | // We double the width of each bucket to reduce the number of buckets by a factor of 2. |
| 78 | bucket_width_ *= 2; |
| 79 | const size_t limit = frequency_.size() / 2; |
| 80 | // Merge the frequencies by adding each adjacent two together. |
| 81 | for (size_t i = 0; i < limit; ++i) { |
| 82 | frequency_[i] = frequency_[i * 2] + frequency_[i * 2 + 1]; |
| 83 | } |
| 84 | // Remove frequencies in the second half of the array which were added to the first half. |
| 85 | while (frequency_.size() > limit) { |
| 86 | frequency_.pop_back(); |
| 87 | } |
| 88 | } |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 89 | max_ += bucket_width_; |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 90 | frequency_.push_back(0); |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 91 | } |
| 92 | } |
| 93 | |
Mathieu Chartier | e5426c9 | 2013-08-01 13:55:42 -0700 | [diff] [blame] | 94 | template <class Value> inline size_t Histogram<Value>::FindBucket(Value val) const { |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 95 | // Since this is only a linear histogram, bucket index can be found simply with |
| 96 | // dividing the value by the bucket width. |
| 97 | DCHECK_GE(val, min_); |
| 98 | DCHECK_LE(val, max_); |
Mathieu Chartier | e5426c9 | 2013-08-01 13:55:42 -0700 | [diff] [blame] | 99 | const size_t bucket_idx = static_cast<size_t>((val - min_) / bucket_width_); |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 100 | DCHECK_GE(bucket_idx, 0ul); |
Mathieu Chartier | e5426c9 | 2013-08-01 13:55:42 -0700 | [diff] [blame] | 101 | DCHECK_LE(bucket_idx, GetBucketCount()); |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 102 | return bucket_idx; |
| 103 | } |
| 104 | |
| 105 | template <class Value> |
Mathieu Chartier | e5426c9 | 2013-08-01 13:55:42 -0700 | [diff] [blame] | 106 | inline void Histogram<Value>::BucketiseValue(Value val) { |
| 107 | CHECK_LT(val, max_); |
| 108 | sum_ += val; |
| 109 | sum_of_squares_ += val * val; |
| 110 | ++sample_size_; |
| 111 | ++frequency_[FindBucket(val)]; |
| 112 | max_value_added_ = std::max(val, max_value_added_); |
| 113 | min_value_added_ = std::min(val, min_value_added_); |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 114 | } |
| 115 | |
| 116 | template <class Value> inline void Histogram<Value>::Initialize() { |
Mathieu Chartier | e5426c9 | 2013-08-01 13:55:42 -0700 | [diff] [blame] | 117 | for (size_t idx = 0; idx < kInitialBucketCount; idx++) { |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 118 | frequency_.push_back(0); |
| 119 | } |
| 120 | // Cumulative frequency and ranges has a length of 1 over frequency. |
Mathieu Chartier | e5426c9 | 2013-08-01 13:55:42 -0700 | [diff] [blame] | 121 | max_ = bucket_width_ * GetBucketCount(); |
| 122 | } |
| 123 | |
| 124 | template <class Value> inline size_t Histogram<Value>::GetBucketCount() const { |
| 125 | return frequency_.size(); |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 126 | } |
| 127 | |
| 128 | template <class Value> inline void Histogram<Value>::Reset() { |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 129 | sum_of_squares_ = 0; |
| 130 | sample_size_ = 0; |
| 131 | min_ = 0; |
| 132 | sum_ = 0; |
| 133 | min_value_added_ = std::numeric_limits<Value>::max(); |
| 134 | max_value_added_ = std::numeric_limits<Value>::min(); |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 135 | frequency_.clear(); |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 136 | Initialize(); |
| 137 | } |
| 138 | |
Mathieu Chartier | e5426c9 | 2013-08-01 13:55:42 -0700 | [diff] [blame] | 139 | template <class Value> inline Value Histogram<Value>::GetRange(size_t bucket_idx) const { |
| 140 | DCHECK_LE(bucket_idx, GetBucketCount()); |
| 141 | return min_ + bucket_idx * bucket_width_; |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 142 | } |
| 143 | |
| 144 | template <class Value> inline double Histogram<Value>::Mean() const { |
| 145 | DCHECK_GT(sample_size_, 0ull); |
| 146 | return static_cast<double>(sum_) / static_cast<double>(sample_size_); |
| 147 | } |
| 148 | |
| 149 | template <class Value> inline double Histogram<Value>::Variance() const { |
| 150 | DCHECK_GT(sample_size_, 0ull); |
| 151 | // Using algorithms for calculating variance over a population: |
| 152 | // http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance |
| 153 | Value sum_squared = sum_ * sum_; |
| 154 | double sum_squared_by_n_squared = |
| 155 | static_cast<double>(sum_squared) / |
| 156 | static_cast<double>(sample_size_ * sample_size_); |
| 157 | double sum_of_squares_by_n = |
| 158 | static_cast<double>(sum_of_squares_) / static_cast<double>(sample_size_); |
| 159 | return sum_of_squares_by_n - sum_squared_by_n_squared; |
| 160 | } |
| 161 | |
| 162 | template <class Value> |
Mathieu Chartier | e5426c9 | 2013-08-01 13:55:42 -0700 | [diff] [blame] | 163 | inline void Histogram<Value>::PrintBins(std::ostream& os, const CumulativeData& data) const { |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 164 | DCHECK_GT(sample_size_, 0ull); |
Mathieu Chartier | e5426c9 | 2013-08-01 13:55:42 -0700 | [diff] [blame] | 165 | for (size_t bin_idx = 0; bin_idx < data.freq_.size(); ++bin_idx) { |
| 166 | if (bin_idx > 0 && data.perc_[bin_idx] == data.perc_[bin_idx - 1]) { |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 167 | bin_idx++; |
| 168 | continue; |
| 169 | } |
Mathieu Chartier | e5426c9 | 2013-08-01 13:55:42 -0700 | [diff] [blame] | 170 | os << GetRange(bin_idx) << ": " << data.freq_[bin_idx] << "\t" |
| 171 | << data.perc_[bin_idx] * 100.0 << "%\n"; |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 172 | } |
| 173 | } |
| 174 | |
| 175 | template <class Value> |
Hiroshi Yamauchi | a1c9f01 | 2015-04-02 10:18:12 -0700 | [diff] [blame] | 176 | inline void Histogram<Value>::DumpBins(std::ostream& os) const { |
| 177 | DCHECK_GT(sample_size_, 0ull); |
| 178 | bool dumped_one = false; |
| 179 | for (size_t bin_idx = 0; bin_idx < frequency_.size(); ++bin_idx) { |
| 180 | if (frequency_[bin_idx] != 0U) { |
| 181 | if (dumped_one) { |
| 182 | // Prepend a comma if not the first bin. |
| 183 | os << ","; |
| 184 | } else { |
| 185 | dumped_one = true; |
| 186 | } |
| 187 | os << GetRange(bin_idx) << ":" << frequency_[bin_idx]; |
| 188 | } |
| 189 | } |
| 190 | } |
| 191 | |
| 192 | template <class Value> |
Mathieu Chartier | e5426c9 | 2013-08-01 13:55:42 -0700 | [diff] [blame] | 193 | inline void Histogram<Value>::PrintConfidenceIntervals(std::ostream &os, double interval, |
| 194 | const CumulativeData& data) const { |
Mathieu Chartier | f5997b4 | 2014-06-20 10:37:54 -0700 | [diff] [blame] | 195 | static constexpr size_t kFractionalDigits = 3; |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 196 | DCHECK_GT(interval, 0); |
| 197 | DCHECK_LT(interval, 1.0); |
Mathieu Chartier | f5997b4 | 2014-06-20 10:37:54 -0700 | [diff] [blame] | 198 | const double per_0 = (1.0 - interval) / 2.0; |
| 199 | const double per_1 = per_0 + interval; |
| 200 | const TimeUnit unit = GetAppropriateTimeUnit(Mean() * kAdjust); |
| 201 | os << Name() << ":\tSum: " << PrettyDuration(Sum() * kAdjust) << " " |
| 202 | << (interval * 100) << "% C.I. " << FormatDuration(Percentile(per_0, data) * kAdjust, unit, |
| 203 | kFractionalDigits) |
| 204 | << "-" << FormatDuration(Percentile(per_1, data) * kAdjust, unit, kFractionalDigits) << " " |
| 205 | << "Avg: " << FormatDuration(Mean() * kAdjust, unit, kFractionalDigits) << " Max: " |
Andreas Gampe | 5544e72 | 2017-06-05 17:01:27 -0700 | [diff] [blame] | 206 | << FormatDuration(Max() * kAdjust, unit, kFractionalDigits) << std::endl; |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 207 | } |
| 208 | |
Mathieu Chartier | b2f9936 | 2013-11-20 17:26:00 -0800 | [diff] [blame] | 209 | template <class Value> |
Nicolas Geoffray | a4f8154 | 2016-03-08 16:57:48 +0000 | [diff] [blame] | 210 | inline void Histogram<Value>::PrintMemoryUse(std::ostream &os) const { |
Pavel Vyssotski | 3e80aeb | 2016-05-17 16:37:53 +0600 | [diff] [blame] | 211 | os << Name(); |
| 212 | if (sample_size_ != 0u) { |
| 213 | os << ": Avg: " << PrettySize(Mean()) << " Max: " |
| 214 | << PrettySize(Max()) << " Min: " << PrettySize(Min()) << "\n"; |
| 215 | } else { |
| 216 | os << ": <no data>\n"; |
| 217 | } |
Nicolas Geoffray | a4f8154 | 2016-03-08 16:57:48 +0000 | [diff] [blame] | 218 | } |
| 219 | |
| 220 | template <class Value> |
Mathieu Chartier | b2f9936 | 2013-11-20 17:26:00 -0800 | [diff] [blame] | 221 | inline void Histogram<Value>::CreateHistogram(CumulativeData* out_data) const { |
Mathieu Chartier | e5426c9 | 2013-08-01 13:55:42 -0700 | [diff] [blame] | 222 | DCHECK_GT(sample_size_, 0ull); |
Ian Rogers | 500793f | 2013-11-14 17:49:12 -0800 | [diff] [blame] | 223 | out_data->freq_.clear(); |
| 224 | out_data->perc_.clear(); |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 225 | uint64_t accumulated = 0; |
Ian Rogers | 500793f | 2013-11-14 17:49:12 -0800 | [diff] [blame] | 226 | out_data->freq_.push_back(accumulated); |
| 227 | out_data->perc_.push_back(0.0); |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 228 | for (size_t idx = 0; idx < frequency_.size(); idx++) { |
| 229 | accumulated += frequency_[idx]; |
Ian Rogers | 500793f | 2013-11-14 17:49:12 -0800 | [diff] [blame] | 230 | out_data->freq_.push_back(accumulated); |
| 231 | out_data->perc_.push_back(static_cast<double>(accumulated) / static_cast<double>(sample_size_)); |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 232 | } |
Ian Rogers | 500793f | 2013-11-14 17:49:12 -0800 | [diff] [blame] | 233 | DCHECK_EQ(out_data->freq_.back(), sample_size_); |
| 234 | DCHECK_LE(std::abs(out_data->perc_.back() - 1.0), 0.001); |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 235 | } |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 236 | |
Ian Rogers | 647b1a8 | 2014-10-10 11:02:11 -0700 | [diff] [blame] | 237 | #pragma clang diagnostic push |
| 238 | #pragma clang diagnostic ignored "-Wfloat-equal" |
Ian Rogers | 647b1a8 | 2014-10-10 11:02:11 -0700 | [diff] [blame] | 239 | |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 240 | template <class Value> |
Mathieu Chartier | e5426c9 | 2013-08-01 13:55:42 -0700 | [diff] [blame] | 241 | inline double Histogram<Value>::Percentile(double per, const CumulativeData& data) const { |
| 242 | DCHECK_GT(data.perc_.size(), 0ull); |
| 243 | size_t upper_idx = 0, lower_idx = 0; |
| 244 | for (size_t idx = 0; idx < data.perc_.size(); idx++) { |
| 245 | if (per <= data.perc_[idx]) { |
Sameer Abu Asal | c081e36 | 2013-02-20 16:45:38 -0800 | [diff] [blame] | 246 | upper_idx = idx; |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 247 | break; |
Sameer Abu Asal | c081e36 | 2013-02-20 16:45:38 -0800 | [diff] [blame] | 248 | } |
| 249 | |
Mathieu Chartier | e5426c9 | 2013-08-01 13:55:42 -0700 | [diff] [blame] | 250 | if (per >= data.perc_[idx] && idx != 0 && data.perc_[idx] != data.perc_[idx - 1]) { |
Sameer Abu Asal | c081e36 | 2013-02-20 16:45:38 -0800 | [diff] [blame] | 251 | lower_idx = idx; |
| 252 | } |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 253 | } |
Sameer Abu Asal | c081e36 | 2013-02-20 16:45:38 -0800 | [diff] [blame] | 254 | |
Mathieu Chartier | e5426c9 | 2013-08-01 13:55:42 -0700 | [diff] [blame] | 255 | const double lower_perc = data.perc_[lower_idx]; |
| 256 | const double lower_value = static_cast<double>(GetRange(lower_idx)); |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 257 | if (per == lower_perc) { |
| 258 | return lower_value; |
| 259 | } |
Mathieu Chartier | e5426c9 | 2013-08-01 13:55:42 -0700 | [diff] [blame] | 260 | |
| 261 | const double upper_perc = data.perc_[upper_idx]; |
| 262 | const double upper_value = static_cast<double>(GetRange(upper_idx)); |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 263 | if (per == upper_perc) { |
| 264 | return upper_value; |
| 265 | } |
| 266 | DCHECK_GT(upper_perc, lower_perc); |
Sameer Abu Asal | c081e36 | 2013-02-20 16:45:38 -0800 | [diff] [blame] | 267 | |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 268 | double value = lower_value + (upper_value - lower_value) * |
| 269 | (per - lower_perc) / (upper_perc - lower_perc); |
Sameer Abu Asal | 857a078 | 2013-02-21 11:00:20 -0800 | [diff] [blame] | 270 | |
| 271 | if (value < min_value_added_) { |
| 272 | value = min_value_added_; |
| 273 | } else if (value > max_value_added_) { |
| 274 | value = max_value_added_; |
| 275 | } |
| 276 | |
Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame] | 277 | return value; |
| 278 | } |
| 279 | |
Ian Rogers | 647b1a8 | 2014-10-10 11:02:11 -0700 | [diff] [blame] | 280 | #pragma clang diagnostic pop |
Ian Rogers | 647b1a8 | 2014-10-10 11:02:11 -0700 | [diff] [blame] | 281 | |
Sameer Abu Asal | c081e36 | 2013-02-20 16:45:38 -0800 | [diff] [blame] | 282 | } // namespace art |
David Sehr | c431b9d | 2018-03-02 12:01:51 -0800 | [diff] [blame] | 283 | #endif // ART_LIBARTBASE_BASE_HISTOGRAM_INL_H_ |