blob: 2993fbef3f0765ae1c70f88206d1a8ea520c651d [file] [log] [blame]
Sameer Abu Asala8439542013-02-14 16:06:42 -08001/*
2 * Copyright (C) 2013 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
David Sehrc431b9d2018-03-02 12:01:51 -080016#ifndef ART_LIBARTBASE_BASE_HISTOGRAM_H_
17#define ART_LIBARTBASE_BASE_HISTOGRAM_H_
Sameer Abu Asala8439542013-02-14 16:06:42 -080018
Sameer Abu Asala8439542013-02-14 16:06:42 -080019#include <string>
Andreas Gampe8cf9cb32017-07-19 09:28:38 -070020#include <vector>
Sameer Abu Asala8439542013-02-14 16:06:42 -080021
Andreas Gampe57943812017-12-06 21:39:13 -080022#include <android-base/macros.h>
Sameer Abu Asala8439542013-02-14 16:06:42 -080023
24namespace art {
25
26// Creates a data histogram for a better understanding of statistical data.
27// Histogram analysis goes beyond simple mean and standard deviation to provide
28// percentiles values, describing where the $% of the input data lies.
29// Designed to be simple and used with timing logger in art.
30
31template <class Value> class Histogram {
Sameer Abu Asala8439542013-02-14 16:06:42 -080032 const double kAdjust;
Sameer Abu Asala8439542013-02-14 16:06:42 -080033 const size_t kInitialBucketCount;
34
35 public:
Mathieu Chartiere5426c92013-08-01 13:55:42 -070036 class CumulativeData {
37 friend class Histogram<Value>;
38 std::vector<uint64_t> freq_;
39 std::vector<double> perc_;
40 };
41
Hans Boehm19736872019-04-22 10:20:22 -070042 // Minimum and initial number of allocated buckets in histogram.
43 static constexpr size_t kMinBuckets = 8;
Mathieu Chartier7410f292013-11-24 13:17:35 -080044 // Used by the cumulative timing logger to search the histogram set using for an existing split
45 // with the same name using CumulativeLogger::HistogramComparator.
Mathieu Chartier19b0a912013-11-20 14:07:54 -080046 explicit Histogram(const char* name);
Hans Boehm19736872019-04-22 10:20:22 -070047 // This is the expected constructor when creating new Histograms. Max_buckets must be even.
48 // Max_buckets, if specified, must be at least kMinBuckets.
Mathieu Chartiere5426c92013-08-01 13:55:42 -070049 Histogram(const char* name, Value initial_bucket_width, size_t max_buckets = 100);
Sameer Abu Asala8439542013-02-14 16:06:42 -080050 void AddValue(Value);
Mathieu Chartier70a596d2014-12-17 14:56:47 -080051 void AdjustAndAddValue(Value); // Add a value after dividing it by kAdjust.
Mathieu Chartiere5426c92013-08-01 13:55:42 -070052 // Builds the cumulative distribution function from the frequency data.
53 // Accumulative summation of frequencies.
54 // cumulative_freq[i] = sum(frequency[j] : 0 < j < i )
55 // Accumulative summation of percentiles; which is the frequency / SampleSize
56 // cumulative_perc[i] = sum(frequency[j] / SampleSize : 0 < j < i )
Mathieu Chartierb2f99362013-11-20 17:26:00 -080057 void CreateHistogram(CumulativeData* data) const;
Mathieu Chartiere5426c92013-08-01 13:55:42 -070058 // Reset the cumulative values, next time CreateHistogram is called it will recreate the cache.
Sameer Abu Asala8439542013-02-14 16:06:42 -080059 void Reset();
60 double Mean() const;
61 double Variance() const;
Mathieu Chartiere5426c92013-08-01 13:55:42 -070062 double Percentile(double per, const CumulativeData& data) const;
63 void PrintConfidenceIntervals(std::ostream& os, double interval,
64 const CumulativeData& data) const;
Nicolas Geoffraya4f81542016-03-08 16:57:48 +000065 void PrintMemoryUse(std::ostream& os) const;
Mathieu Chartiere5426c92013-08-01 13:55:42 -070066 void PrintBins(std::ostream& os, const CumulativeData& data) const;
Hiroshi Yamauchia1c9f012015-04-02 10:18:12 -070067 void DumpBins(std::ostream& os) const;
Mathieu Chartiere5426c92013-08-01 13:55:42 -070068 Value GetRange(size_t bucket_idx) const;
69 size_t GetBucketCount() const;
Sameer Abu Asala8439542013-02-14 16:06:42 -080070
71 uint64_t SampleSize() const {
72 return sample_size_;
73 }
74
75 Value Sum() const {
76 return sum_;
77 }
78
Mathieu Chartier19d46b42014-06-17 15:04:40 -070079 Value AdjustedSum() const {
80 return sum_ * kAdjust;
81 }
82
Sameer Abu Asala8439542013-02-14 16:06:42 -080083 Value Min() const {
84 return min_value_added_;
85 }
86
87 Value Max() const {
88 return max_value_added_;
89 }
90
Mathieu Chartier0dce75d2016-05-11 11:35:39 -070091 Value BucketWidth() const {
92 return bucket_width_;
93 }
94
Mathieu Chartiere5426c92013-08-01 13:55:42 -070095 const std::string& Name() const {
Sameer Abu Asala8439542013-02-14 16:06:42 -080096 return name_;
97 }
98
Sameer Abu Asala8439542013-02-14 16:06:42 -080099 private:
Sameer Abu Asala8439542013-02-14 16:06:42 -0800100 void Initialize();
Mathieu Chartiere5426c92013-08-01 13:55:42 -0700101 size_t FindBucket(Value val) const;
102 void BucketiseValue(Value val);
Sameer Abu Asala8439542013-02-14 16:06:42 -0800103 // Add more buckets to the histogram to fill in a new value that exceeded
104 // the max_read_value_.
Mathieu Chartiere5426c92013-08-01 13:55:42 -0700105 void GrowBuckets(Value val);
Sameer Abu Asala8439542013-02-14 16:06:42 -0800106 std::string name_;
Mathieu Chartiere5426c92013-08-01 13:55:42 -0700107 // Maximum number of buckets.
108 const size_t max_buckets_;
Sameer Abu Asala8439542013-02-14 16:06:42 -0800109 // Number of samples placed in histogram.
Mathieu Chartiere5426c92013-08-01 13:55:42 -0700110 size_t sample_size_;
Sameer Abu Asala8439542013-02-14 16:06:42 -0800111 // Width of the bucket range. The lower the value is the more accurate
Mathieu Chartiere5426c92013-08-01 13:55:42 -0700112 // histogram percentiles are. Grows adaptively when we hit max buckets.
Sameer Abu Asala8439542013-02-14 16:06:42 -0800113 Value bucket_width_;
Mathieu Chartiere5426c92013-08-01 13:55:42 -0700114 // How many occurrences of values fall within a bucket at index i where i covers the range
115 // starting at min_ + i * bucket_width_ with size bucket_size_.
116 std::vector<uint32_t> frequency_;
Sameer Abu Asala8439542013-02-14 16:06:42 -0800117 // Summation of all the elements inputed by the user.
118 Value sum_;
Sameer Abu Asala8439542013-02-14 16:06:42 -0800119 // Minimum value that can fit in the histogram. Fixed to zero for now.
Mathieu Chartiere5426c92013-08-01 13:55:42 -0700120 Value min_;
121 // Maximum value that can fit in the histogram, grows adaptively.
Sameer Abu Asala8439542013-02-14 16:06:42 -0800122 Value max_;
123 // Summation of the values entered. Used to calculate variance.
124 Value sum_of_squares_;
125 // Maximum value entered in the histogram.
126 Value min_value_added_;
127 // Minimum value entered in the histogram.
128 Value max_value_added_;
129
130 DISALLOW_COPY_AND_ASSIGN(Histogram);
131};
Mathieu Chartier02e25112013-08-14 16:14:24 -0700132} // namespace art
Sameer Abu Asala8439542013-02-14 16:06:42 -0800133
David Sehrc431b9d2018-03-02 12:01:51 -0800134#endif // ART_LIBARTBASE_BASE_HISTOGRAM_H_