Blame - kernel/sched/pelt.c - SHIFTPHONES/mainline/linux

blob: ead6d8b4a8b84fead75424af56597a67d03cceb5 [file] [log] [blame]

Vincent Guittot	c079629	2018-06-28 17:45:04 +0200	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2	/*
				3	* Per Entity Load Tracking
				4	*
				5	* Copyright (C) 2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
				6	*
				7	* Interactivity improvements by Mike Galbraith
				8	* (C) 2007 Mike Galbraith <efault@gmx.de>
				9	*
				10	* Various enhancements by Dmitry Adamushko.
				11	* (C) 2007 Dmitry Adamushko <dmitry.adamushko@gmail.com>
				12	*
				13	* Group scheduling enhancements by Srivatsa Vaddagiri
				14	* Copyright IBM Corporation, 2007
				15	* Author: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
				16	*
				17	* Scaled math optimizations by Thomas Gleixner
				18	* Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de>
				19	*
				20	* Adaptive scheduling granularity, math enhancements by Peter Zijlstra
				21	* Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
				22	*
				23	* Move PELT related code from fair.c into this pelt.c file
				24	* Author: Vincent Guittot <vincent.guittot@linaro.org>
				25	*/
				26
				27	#include <linux/sched.h>
				28	#include "sched.h"
				29	#include "sched-pelt.h"
				30	#include "pelt.h"
				31
				32	/*
				33	* Approximate:
				34	* val * y^n, where y^32 ~= 0.5 (~1 scheduling period)
				35	*/
				36	static u64 decay_load(u64 val, u64 n)
				37	{
				38	unsigned int local_n;
				39
				40	if (unlikely(n > LOAD_AVG_PERIOD * 63))
				41	return 0;
				42
				43	/* after bounds checking we can collapse to 32-bit */
				44	local_n = n;
				45
				46	/*
				47	* As y^PERIOD = 1/2, we can combine
				48	* y^n = 1/2^(n/PERIOD) * y^(n%PERIOD)
				49	* With a look-up table which covers y^n (n<PERIOD)
				50	*
				51	* To achieve constant time decay_load.
				52	*/
				53	if (unlikely(local_n >= LOAD_AVG_PERIOD)) {
				54	val >>= local_n / LOAD_AVG_PERIOD;
				55	local_n %= LOAD_AVG_PERIOD;
				56	}
				57
				58	val = mul_u64_u32_shr(val, runnable_avg_yN_inv[local_n], 32);
				59	return val;
				60	}
				61
				62	static u32 __accumulate_pelt_segments(u64 periods, u32 d1, u32 d3)
				63	{
				64	u32 c1, c2, c3 = d3; /* y^0 == 1 */
				65
				66	/*
				67	* c1 = d1 y^p
				68	*/
				69	c1 = decay_load((u64)d1, periods);
				70
				71	/*
				72	* p-1
				73	* c2 = 1024 \Sum y^n
				74	* n=1
				75	*
				76	* inf inf
				77	* = 1024 ( \Sum y^n - \Sum y^n - y^0 )
				78	* n=0 n=p
				79	*/
				80	c2 = LOAD_AVG_MAX - decay_load(LOAD_AVG_MAX, periods) - 1024;
				81
				82	return c1 + c2 + c3;
				83	}
				84
				85	#define cap_scale(v, s) ((v)*(s) >> SCHED_CAPACITY_SHIFT)
				86
				87	/*
				88	* Accumulate the three separate parts of the sum; d1 the remainder
				89	* of the last (incomplete) period, d2 the span of full periods and d3
				90	* the remainder of the (incomplete) current period.
				91	*
				92	* d1 d2 d3
				93	* ^ ^ ^
				94	* \| \| \|
				95	* \|<->\|<----------------->\|<--->\|
				96	* ... \|---x---\|------\| ... \|------\|-----x (now)
				97	*
				98	* p-1
				99	* u' = (u + d1) y^p + 1024 \Sum y^n + d3 y^0
				100	* n=1
				101	*
				102	* = u y^p + (Step 1)
				103	*
				104	* p-1
				105	* d1 y^p + 1024 \Sum y^n + d3 y^0 (Step 2)
				106	* n=1
				107	*/
				108	static __always_inline u32
				109	accumulate_sum(u64 delta, int cpu, struct sched_avg *sa,
				110	unsigned long load, unsigned long runnable, int running)
				111	{
				112	unsigned long scale_freq, scale_cpu;
				113	u32 contrib = (u32)delta; /* p == 0 -> delta < 1024 */
				114	u64 periods;
				115
				116	scale_freq = arch_scale_freq_capacity(cpu);
				117	scale_cpu = arch_scale_cpu_capacity(NULL, cpu);
				118
				119	delta += sa->period_contrib;
				120	periods = delta / 1024; /* A period is 1024us (~1ms) */
				121
				122	/*
				123	* Step 1: decay old *_sum if we crossed period boundaries.
				124	*/
				125	if (periods) {
				126	sa->load_sum = decay_load(sa->load_sum, periods);
				127	sa->runnable_load_sum =
				128	decay_load(sa->runnable_load_sum, periods);
				129	sa->util_sum = decay_load((u64)(sa->util_sum), periods);
				130
				131	/*
				132	* Step 2
				133	*/
				134	delta %= 1024;
				135	contrib = __accumulate_pelt_segments(periods,
				136	1024 - sa->period_contrib, delta);
				137	}
				138	sa->period_contrib = delta;
				139
				140	contrib = cap_scale(contrib, scale_freq);
				141	if (load)
				142	sa->load_sum += load * contrib;
				143	if (runnable)
				144	sa->runnable_load_sum += runnable * contrib;
				145	if (running)
				146	sa->util_sum += contrib * scale_cpu;
				147
				148	return periods;
				149	}
				150
				151	/*
				152	* We can represent the historical contribution to runnable average as the
				153	* coefficients of a geometric series. To do this we sub-divide our runnable
				154	* history into segments of approximately 1ms (1024us); label the segment that
				155	* occurred N-ms ago p_N, with p_0 corresponding to the current period, e.g.
				156	*
				157	* [<- 1024us ->\|<- 1024us ->\|<- 1024us ->\| ...
				158	* p0 p1 p2
				159	* (now) (~1ms ago) (~2ms ago)
				160	*
				161	* Let u_i denote the fraction of p_i that the entity was runnable.
				162	*
				163	* We then designate the fractions u_i as our co-efficients, yielding the
				164	* following representation of historical load:
				165	* u_0 + u_1y + u_2y^2 + u_3*y^3 + ...
				166	*
				167	* We choose y based on the with of a reasonably scheduling period, fixing:
				168	* y^32 = 0.5
				169	*
				170	* This means that the contribution to load ~32ms ago (u_32) will be weighted
				171	* approximately half as much as the contribution to load within the last ms
				172	* (u_0).
				173	*
				174	* When a period "rolls over" and we have new u_0`, multiplying the previous
				175	* sum again by y is sufficient to update:
				176	* load_avg = u_0` + y(u_0 + u_1y + u_2*y^2 + ... )
				177	* = u_0 + u_1y + u_2y^2 + ... [re-labeling u_i --> u_{i+1}]
				178	*/
				179	static __always_inline int
				180	___update_load_sum(u64 now, int cpu, struct sched_avg *sa,
				181	unsigned long load, unsigned long runnable, int running)
				182	{
				183	u64 delta;
				184
				185	delta = now - sa->last_update_time;
				186	/*
				187	* This should only happen when time goes backwards, which it
				188	* unfortunately does during sched clock init when we swap over to TSC.
				189	*/
				190	if ((s64)delta < 0) {
				191	sa->last_update_time = now;
				192	return 0;
				193	}
				194
				195	/*
				196	* Use 1024ns as the unit of measurement since it's a reasonable
				197	* approximation of 1us and fast to compute.
				198	*/
				199	delta >>= 10;
				200	if (!delta)
				201	return 0;
				202
				203	sa->last_update_time += delta << 10;
				204
				205	/*
				206	* running is a subset of runnable (weight) so running can't be set if
				207	* runnable is clear. But there are some corner cases where the current
				208	* se has been already dequeued but cfs_rq->curr still points to it.
				209	* This means that weight will be 0 but not running for a sched_entity
				210	* but also for a cfs_rq if the latter becomes idle. As an example,
				211	* this happens during idle_balance() which calls
				212	* update_blocked_averages()
				213	*/
				214	if (!load)
				215	runnable = running = 0;
				216
				217	/*
				218	* Now we know we crossed measurement unit boundaries. The *_avg
				219	* accrues by two steps:
				220	*
				221	* Step 1: accumulate *_sum since last_update_time. If we haven't
				222	* crossed period boundaries, finish.
				223	*/
				224	if (!accumulate_sum(delta, cpu, sa, load, runnable, running))
				225	return 0;
				226
				227	return 1;
				228	}
				229
				230	static __always_inline void
				231	___update_load_avg(struct sched_avg *sa, unsigned long load, unsigned long runnable)
				232	{
				233	u32 divider = LOAD_AVG_MAX - 1024 + sa->period_contrib;
				234
				235	/*
				236	* Step 2: update *_avg.
				237	*/
				238	sa->load_avg = div_u64(load * sa->load_sum, divider);
				239	sa->runnable_load_avg = div_u64(runnable * sa->runnable_load_sum, divider);
				240	sa->util_avg = sa->util_sum / divider;
				241	}
				242
				243	/*
				244	* sched_entity:
				245	*
				246	* task:
				247	* se_runnable() == se_weight()
				248	*
				249	* group: [ see update_cfs_group() ]
				250	* se_weight() = tg->weight * grq->load_avg / tg->load_avg
				251	* se_runnable() = se_weight(se) * grq->runnable_load_avg / grq->load_avg
				252	*
				253	* load_sum := runnable_sum
				254	* load_avg = se_weight(se) * runnable_avg
				255	*
				256	* runnable_load_sum := runnable_sum
				257	* runnable_load_avg = se_runnable(se) * runnable_avg
				258	*
				259	* XXX collapse load_sum and runnable_load_sum
				260	*
				261	* cfq_rq:
				262	*
				263	* load_sum = \Sum se_weight(se) * se->avg.load_sum
				264	* load_avg = \Sum se->avg.load_avg
				265	*
				266	* runnable_load_sum = \Sum se_runnable(se) * se->avg.runnable_load_sum
				267	* runnable_load_avg = \Sum se->avg.runable_load_avg
				268	*/
				269
				270	int __update_load_avg_blocked_se(u64 now, int cpu, struct sched_entity *se)
				271	{
				272	if (entity_is_task(se))
				273	se->runnable_weight = se->load.weight;
				274
				275	if (___update_load_sum(now, cpu, &se->avg, 0, 0, 0)) {
				276	___update_load_avg(&se->avg, se_weight(se), se_runnable(se));
				277	return 1;
				278	}
				279
				280	return 0;
				281	}
				282
				283	int __update_load_avg_se(u64 now, int cpu, struct cfs_rq cfs_rq, struct sched_entity se)
				284	{
				285	if (entity_is_task(se))
				286	se->runnable_weight = se->load.weight;
				287
				288	if (___update_load_sum(now, cpu, &se->avg, !!se->on_rq, !!se->on_rq,
				289	cfs_rq->curr == se)) {
				290
				291	___update_load_avg(&se->avg, se_weight(se), se_runnable(se));
				292	cfs_se_util_change(&se->avg);
				293	return 1;
				294	}
				295
				296	return 0;
				297	}
				298
				299	int __update_load_avg_cfs_rq(u64 now, int cpu, struct cfs_rq *cfs_rq)
				300	{
				301	if (___update_load_sum(now, cpu, &cfs_rq->avg,
				302	scale_load_down(cfs_rq->load.weight),
				303	scale_load_down(cfs_rq->runnable_weight),
				304	cfs_rq->curr != NULL)) {
				305
				306	___update_load_avg(&cfs_rq->avg, 1, 1);
				307	return 1;
				308	}
				309
				310	return 0;
				311	}
Vincent Guittot	371bf42	2018-06-28 17:45:05 +0200	[diff] [blame]	312
				313	/*
				314	* rt_rq:
				315	*
				316	* util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked
				317	* util_sum = cpu_scale * load_sum
				318	* runnable_load_sum = load_sum
				319	*
				320	* load_avg and runnable_load_avg are not supported and meaningless.
				321	*
				322	*/
				323
				324	int update_rt_rq_load_avg(u64 now, struct rq *rq, int running)
				325	{
				326	if (___update_load_sum(now, rq->cpu, &rq->avg_rt,
				327	running,
				328	running,
				329	running)) {
				330
				331	___update_load_avg(&rq->avg_rt, 1, 1);
				332	return 1;
				333	}
				334
				335	return 0;
				336	}
Vincent Guittot	3727e0e	2018-06-28 17:45:07 +0200	[diff] [blame]	337
				338	/*
				339	* dl_rq:
				340	*
				341	* util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked
				342	* util_sum = cpu_scale * load_sum
				343	* runnable_load_sum = load_sum
				344	*
				345	*/
				346
				347	int update_dl_rq_load_avg(u64 now, struct rq *rq, int running)
				348	{
				349	if (___update_load_sum(now, rq->cpu, &rq->avg_dl,
				350	running,
				351	running,
				352	running)) {
				353
				354	___update_load_avg(&rq->avg_dl, 1, 1);
				355	return 1;
				356	}
				357
				358	return 0;
				359	}
Vincent Guittot	91c2749	2018-06-28 17:45:09 +0200	[diff] [blame^]	360
				361	#if defined(CONFIG_IRQ_TIME_ACCOUNTING) \|\| defined(CONFIG_PARAVIRT_TIME_ACCOUNTING)
				362	/*
				363	* irq:
				364	*
				365	* util_sum = \Sum se->avg.util_sum but se->avg.util_sum is not tracked
				366	* util_sum = cpu_scale * load_sum
				367	* runnable_load_sum = load_sum
				368	*
				369	*/
				370
				371	int update_irq_load_avg(struct rq *rq, u64 running)
				372	{
				373	int ret = 0;
				374	/*
				375	* We know the time that has been used by interrupt since last update
				376	* but we don't when. Let be pessimistic and assume that interrupt has
				377	* happened just before the update. This is not so far from reality
				378	* because interrupt will most probably wake up task and trig an update
				379	* of rq clock during which the metric si updated.
				380	* We start to decay with normal context time and then we add the
				381	* interrupt context time.
				382	* We can safely remove running from rq->clock because
				383	* rq->clock += delta with delta >= running
				384	*/
				385	ret = ___update_load_sum(rq->clock - running, rq->cpu, &rq->avg_irq,
				386	0,
				387	0,
				388	0);
				389	ret += ___update_load_sum(rq->clock, rq->cpu, &rq->avg_irq,
				390	1,
				391	1,
				392	1);
				393
				394	if (ret)
				395	___update_load_avg(&rq->avg_irq, 1, 1);
				396
				397	return ret;
				398	}
				399	#endif