Sameer Abu Asal | a843954 | 2013-02-14 16:06:42 -0800 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright (C) 2013 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | #ifndef SRC_BASE_HISTOGRAM_INL_H_ |
| 18 | #define SRC_BASE_HISTOGRAM_INL_H_ |
| 19 | |
| 20 | #include "histogram.h" |
| 21 | |
| 22 | #include "utils.h" |
| 23 | |
| 24 | #include <algorithm> |
| 25 | #include <cmath> |
| 26 | #include <limits> |
| 27 | #include <ostream> |
| 28 | |
| 29 | namespace art { |
| 30 | |
| 31 | template <class Value> inline void Histogram<Value>::AddValue(Value value) { |
| 32 | CHECK_GE(value, 0.0); |
| 33 | if (value >= max_) { |
| 34 | Value new_max = ((value + 1) / bucket_width_ + 1) * bucket_width_; |
| 35 | DCHECK_GT(new_max, max_); |
| 36 | GrowBuckets(new_max); |
| 37 | } |
| 38 | |
| 39 | BucketiseValue(value); |
| 40 | new_values_added_ = true; |
| 41 | } |
| 42 | |
| 43 | template <class Value> |
| 44 | inline Histogram<Value>::Histogram(const std::string name) |
| 45 | : kAdjust(1000), |
| 46 | kBucketWidth(5), |
| 47 | kInitialBucketCount(10), |
| 48 | bucket_width_(kBucketWidth), |
| 49 | bucket_count_(kInitialBucketCount) { |
| 50 | name_ = name; |
| 51 | Reset(); |
| 52 | } |
| 53 | |
| 54 | template <class Value> |
| 55 | inline void Histogram<Value>::GrowBuckets(Value new_max) { |
| 56 | while (max_ < new_max) { |
| 57 | max_ += bucket_width_; |
| 58 | ranges_.push_back(max_); |
| 59 | frequency_.push_back(0); |
| 60 | bucket_count_++; |
| 61 | } |
| 62 | } |
| 63 | |
| 64 | template <class Value> inline size_t Histogram<Value>::FindBucket(Value val) { |
| 65 | // Since this is only a linear histogram, bucket index can be found simply with |
| 66 | // dividing the value by the bucket width. |
| 67 | DCHECK_GE(val, min_); |
| 68 | DCHECK_LE(val, max_); |
| 69 | size_t bucket_idx = static_cast<size_t>((double)(val - min_) / bucket_width_); |
| 70 | DCHECK_GE(bucket_idx, 0ul); |
| 71 | DCHECK_LE(bucket_idx, bucket_count_); |
| 72 | return bucket_idx; |
| 73 | } |
| 74 | |
| 75 | template <class Value> |
| 76 | inline void Histogram<Value>::BucketiseValue(Value value) { |
| 77 | CHECK_LT(value, max_); |
| 78 | sum_ += value; |
| 79 | sum_of_squares_ += value * value; |
| 80 | size_t bucket_idx = FindBucket(value); |
| 81 | sample_size_++; |
| 82 | if (value > max_value_added_) { |
| 83 | max_value_added_ = value; |
| 84 | } |
| 85 | if (value < min_value_added_) { |
| 86 | min_value_added_ = value; |
| 87 | } |
| 88 | frequency_[bucket_idx]++; |
| 89 | } |
| 90 | |
| 91 | template <class Value> inline void Histogram<Value>::Initialize() { |
| 92 | DCHECK_GT(bucket_count_, 0ul); |
| 93 | size_t idx = 0; |
| 94 | for (; idx < bucket_count_; idx++) { |
| 95 | ranges_.push_back(min_ + static_cast<Value>(idx) * (bucket_width_)); |
| 96 | frequency_.push_back(0); |
| 97 | } |
| 98 | // Cumulative frequency and ranges has a length of 1 over frequency. |
| 99 | ranges_.push_back(min_ + idx * bucket_width_); |
| 100 | max_ = bucket_width_ * bucket_count_; |
| 101 | } |
| 102 | |
| 103 | template <class Value> inline void Histogram<Value>::Reset() { |
| 104 | bucket_width_ = kBucketWidth; |
| 105 | bucket_count_ = kInitialBucketCount; |
| 106 | max_ = bucket_width_ * bucket_count_; |
| 107 | sum_of_squares_ = 0; |
| 108 | sample_size_ = 0; |
| 109 | min_ = 0; |
| 110 | sum_ = 0; |
| 111 | min_value_added_ = std::numeric_limits<Value>::max(); |
| 112 | max_value_added_ = std::numeric_limits<Value>::min(); |
| 113 | new_values_added_ = false; |
| 114 | ranges_.clear(); |
| 115 | frequency_.clear(); |
| 116 | cumulative_freq_.clear(); |
| 117 | cumulative_perc_.clear(); |
| 118 | Initialize(); |
| 119 | } |
| 120 | |
| 121 | template <class Value> inline void Histogram<Value>::BuildRanges() { |
| 122 | for (size_t idx = 0; idx < bucket_count_; ++idx) { |
| 123 | ranges_.push_back(min_ + idx * bucket_width_); |
| 124 | } |
| 125 | } |
| 126 | |
| 127 | template <class Value> inline double Histogram<Value>::Mean() const { |
| 128 | DCHECK_GT(sample_size_, 0ull); |
| 129 | return static_cast<double>(sum_) / static_cast<double>(sample_size_); |
| 130 | } |
| 131 | |
| 132 | template <class Value> inline double Histogram<Value>::Variance() const { |
| 133 | DCHECK_GT(sample_size_, 0ull); |
| 134 | // Using algorithms for calculating variance over a population: |
| 135 | // http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance |
| 136 | Value sum_squared = sum_ * sum_; |
| 137 | double sum_squared_by_n_squared = |
| 138 | static_cast<double>(sum_squared) / |
| 139 | static_cast<double>(sample_size_ * sample_size_); |
| 140 | double sum_of_squares_by_n = |
| 141 | static_cast<double>(sum_of_squares_) / static_cast<double>(sample_size_); |
| 142 | return sum_of_squares_by_n - sum_squared_by_n_squared; |
| 143 | } |
| 144 | |
| 145 | template <class Value> |
| 146 | inline void Histogram<Value>::PrintBins(std::ostream &os) { |
| 147 | DCHECK_GT(sample_size_, 0ull); |
| 148 | DCHECK(new_values_added_); |
| 149 | size_t bin_idx = 0; |
| 150 | while (bin_idx < cumulative_freq_.size()) { |
| 151 | if (bin_idx > 0 && |
| 152 | cumulative_perc_[bin_idx] == cumulative_perc_[bin_idx - 1]) { |
| 153 | bin_idx++; |
| 154 | continue; |
| 155 | } |
| 156 | os << ranges_[bin_idx] << ": " << cumulative_freq_[bin_idx] << "\t" |
| 157 | << cumulative_perc_[bin_idx] * 100.0 << "%\n"; |
| 158 | bin_idx++; |
| 159 | } |
| 160 | } |
| 161 | |
| 162 | template <class Value> |
| 163 | inline void Histogram<Value>::PrintConfidenceIntervals(std::ostream &os, |
| 164 | double interval) const { |
| 165 | DCHECK_GT(interval, 0); |
| 166 | DCHECK_LT(interval, 1.0); |
| 167 | |
| 168 | double per_0 = (1.0 - interval) / 2.0; |
| 169 | double per_1 = per_0 + interval; |
| 170 | os << Name() << ":\t"; |
| 171 | TimeUnit unit = GetAppropriateTimeUnit(Mean() * kAdjust); |
| 172 | os << interval << "% C.I. " |
| 173 | << FormatDuration(Percentile(per_0) * kAdjust, unit); |
| 174 | os << "-" << FormatDuration(Percentile(per_1) * kAdjust, unit) << " "; |
| 175 | os << "Avg: " << FormatDuration(Mean() * kAdjust, unit) << " Max: "; |
| 176 | os << FormatDuration(Max() * kAdjust, unit) << "\n"; |
| 177 | } |
| 178 | |
| 179 | template <class Value> inline void Histogram<Value>::BuildCDF() { |
| 180 | DCHECK_EQ(cumulative_freq_.size(), 0ull); |
| 181 | DCHECK_EQ(cumulative_perc_.size(), 0ull); |
| 182 | uint64_t accumulated = 0; |
| 183 | |
| 184 | cumulative_freq_.push_back(accumulated); |
| 185 | cumulative_perc_.push_back(0.0); |
| 186 | for (size_t idx = 0; idx < frequency_.size(); idx++) { |
| 187 | accumulated += frequency_[idx]; |
| 188 | cumulative_freq_.push_back(accumulated); |
| 189 | cumulative_perc_.push_back(static_cast<double>(accumulated) / |
| 190 | static_cast<double>(sample_size_)); |
| 191 | } |
| 192 | DCHECK_EQ(*(cumulative_freq_.end() - 1), sample_size_); |
| 193 | DCHECK_EQ(*(cumulative_perc_.end() - 1), 1.0); |
| 194 | } |
| 195 | |
| 196 | template <class Value> inline void Histogram<Value>::CreateHistogram() { |
| 197 | DCHECK_GT(sample_size_, 0ull); |
| 198 | |
| 199 | // Create a histogram only if new values are added. |
| 200 | if (!new_values_added_) |
| 201 | return; |
| 202 | |
| 203 | // Reset cumulative values in case this is not the first time creating histogram. |
| 204 | cumulative_freq_.clear(); |
| 205 | cumulative_perc_.clear(); |
| 206 | BuildCDF(); |
| 207 | new_values_added_ = false; |
| 208 | } |
| 209 | ; |
| 210 | |
| 211 | template <class Value> |
| 212 | inline double Histogram<Value>::Percentile(double per) const { |
| 213 | DCHECK_GT(cumulative_perc_.size(), 0ull); |
| 214 | size_t idx; |
| 215 | for (idx = 0; idx < cumulative_perc_.size(); idx++) { |
| 216 | if (per <= cumulative_perc_[idx + 1]) |
| 217 | break; |
| 218 | } |
| 219 | double lower_value = static_cast<double>(ranges_[idx]); |
| 220 | double upper_value = static_cast<double>(ranges_[idx + 1]); |
| 221 | double lower_perc = cumulative_perc_[idx]; |
| 222 | double upper_perc = cumulative_perc_[idx + 1]; |
| 223 | if (per == lower_perc) { |
| 224 | return lower_value; |
| 225 | } |
| 226 | if (per == upper_perc) { |
| 227 | return upper_value; |
| 228 | } |
| 229 | DCHECK_GT(upper_perc, lower_perc); |
| 230 | double value = lower_value + (upper_value - lower_value) * |
| 231 | (per - lower_perc) / (upper_perc - lower_perc); |
| 232 | |
| 233 | return value; |
| 234 | } |
| 235 | |
| 236 | } // namespace art |
| 237 | #endif // SRC_BASE_HISTOGRAM_INL_H_ |
| 238 | |