Blame - kernel/sched/topology.c - SHIFTPHONES/mainline/linux

blob: 1bd7e3af904f6d2724c260483808107d1898a568 [file] [log] [blame]

Greg Kroah-Hartman	b244131	2017-11-01 15:07:57 +0100	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2	/*
				3	* Scheduler topology setup/handling methods
				4	*/
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	5	#include "sched.h"
				6
				7	DEFINE_MUTEX(sched_domains_mutex);
				8
				9	/* Protected by sched_domains_mutex: */
zhong jiang	ace8031	2018-08-03 20:37:32 +0800	[diff] [blame]	10	static cpumask_var_t sched_domains_tmpmask;
				11	static cpumask_var_t sched_domains_tmpmask2;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	12
				13	#ifdef CONFIG_SCHED_DEBUG
				14
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	15	static int __init sched_debug_setup(char *str)
				16	{
Peter Zijlstra	9469eb0	2017-09-07 17:03:53 +0200	[diff] [blame]	17	sched_debug_enabled = true;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	18
				19	return 0;
				20	}
				21	early_param("sched_debug", sched_debug_setup);
				22
				23	static inline bool sched_debug(void)
				24	{
				25	return sched_debug_enabled;
				26	}
				27
				28	static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
				29	struct cpumask *groupmask)
				30	{
				31	struct sched_group *group = sd->groups;
				32
				33	cpumask_clear(groupmask);
				34
Peter Zijlstra	005f874	2017-04-26 17:35:35 +0200	[diff] [blame]	35	printk(KERN_DEBUG "%*s domain-%d: ", level, "", level);
Peter Zijlstra	005f874	2017-04-26 17:35:35 +0200	[diff] [blame]	36	printk(KERN_CONT "span=%*pbl level=%s\n",
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	37	cpumask_pr_args(sched_domain_span(sd)), sd->name);
				38
				39	if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
Ingo Molnar	97fb7a0	2018-03-03 14:01:12 +0100	[diff] [blame]	40	printk(KERN_ERR "ERROR: domain->span does not contain CPU%d\n", cpu);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	41	}
Yi Wang	6cd0c58	2018-07-23 12:19:07 +0800	[diff] [blame]	42	if (group && !cpumask_test_cpu(cpu, sched_group_span(group))) {
Ingo Molnar	97fb7a0	2018-03-03 14:01:12 +0100	[diff] [blame]	43	printk(KERN_ERR "ERROR: domain->groups does not contain CPU%d\n", cpu);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	44	}
				45
				46	printk(KERN_DEBUG "%*s groups:", level + 1, "");
				47	do {
				48	if (!group) {
				49	printk("\n");
				50	printk(KERN_ERR "ERROR: group is NULL\n");
				51	break;
				52	}
				53
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	54	if (!cpumask_weight(sched_group_span(group))) {
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	55	printk(KERN_CONT "\n");
				56	printk(KERN_ERR "ERROR: empty group\n");
				57	break;
				58	}
				59
				60	if (!(sd->flags & SD_OVERLAP) &&
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	61	cpumask_intersects(groupmask, sched_group_span(group))) {
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	62	printk(KERN_CONT "\n");
				63	printk(KERN_ERR "ERROR: repeated CPUs\n");
				64	break;
				65	}
				66
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	67	cpumask_or(groupmask, groupmask, sched_group_span(group));
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	68
Peter Zijlstra	005f874	2017-04-26 17:35:35 +0200	[diff] [blame]	69	printk(KERN_CONT " %d:{ span=%*pbl",
				70	group->sgc->id,
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	71	cpumask_pr_args(sched_group_span(group)));
Peter Zijlstra	b0151c2	2017-04-14 17:29:16 +0200	[diff] [blame]	72
Peter Zijlstra	af21812	2017-05-01 08:51:05 +0200	[diff] [blame]	73	if ((sd->flags & SD_OVERLAP) &&
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	74	!cpumask_equal(group_balance_mask(group), sched_group_span(group))) {
Peter Zijlstra	005f874	2017-04-26 17:35:35 +0200	[diff] [blame]	75	printk(KERN_CONT " mask=%*pbl",
Peter Zijlstra	e5c14b1	2017-05-01 10:47:02 +0200	[diff] [blame]	76	cpumask_pr_args(group_balance_mask(group)));
Peter Zijlstra	b0151c2	2017-04-14 17:29:16 +0200	[diff] [blame]	77	}
				78
Peter Zijlstra	005f874	2017-04-26 17:35:35 +0200	[diff] [blame]	79	if (group->sgc->capacity != SCHED_CAPACITY_SCALE)
				80	printk(KERN_CONT " cap=%lu", group->sgc->capacity);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	81
Peter Zijlstra	a420b06	2017-04-14 18:20:48 +0200	[diff] [blame]	82	if (group == sd->groups && sd->child &&
				83	!cpumask_equal(sched_domain_span(sd->child),
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	84	sched_group_span(group))) {
Peter Zijlstra	a420b06	2017-04-14 18:20:48 +0200	[diff] [blame]	85	printk(KERN_ERR "ERROR: domain->groups does not match domain->child\n");
				86	}
				87
Peter Zijlstra	005f874	2017-04-26 17:35:35 +0200	[diff] [blame]	88	printk(KERN_CONT " }");
				89
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	90	group = group->next;
Peter Zijlstra	b0151c2	2017-04-14 17:29:16 +0200	[diff] [blame]	91
				92	if (group != sd->groups)
				93	printk(KERN_CONT ",");
				94
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	95	} while (group != sd->groups);
				96	printk(KERN_CONT "\n");
				97
				98	if (!cpumask_equal(sched_domain_span(sd), groupmask))
				99	printk(KERN_ERR "ERROR: groups don't span domain->span\n");
				100
				101	if (sd->parent &&
				102	!cpumask_subset(groupmask, sched_domain_span(sd->parent)))
Ingo Molnar	97fb7a0	2018-03-03 14:01:12 +0100	[diff] [blame]	103	printk(KERN_ERR "ERROR: parent span is not a superset of domain->span\n");
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	104	return 0;
				105	}
				106
				107	static void sched_domain_debug(struct sched_domain *sd, int cpu)
				108	{
				109	int level = 0;
				110
				111	if (!sched_debug_enabled)
				112	return;
				113
				114	if (!sd) {
				115	printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu);
				116	return;
				117	}
				118
Peter Zijlstra	005f874	2017-04-26 17:35:35 +0200	[diff] [blame]	119	printk(KERN_DEBUG "CPU%d attaching sched-domain(s):\n", cpu);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	120
				121	for (;;) {
				122	if (sched_domain_debug_one(sd, cpu, level, sched_domains_tmpmask))
				123	break;
				124	level++;
				125	sd = sd->parent;
				126	if (!sd)
				127	break;
				128	}
				129	}
				130	#else /* !CONFIG_SCHED_DEBUG */
				131
				132	# define sched_debug_enabled 0
				133	# define sched_domain_debug(sd, cpu) do { } while (0)
				134	static inline bool sched_debug(void)
				135	{
				136	return false;
				137	}
				138	#endif /* CONFIG_SCHED_DEBUG */
				139
				140	static int sd_degenerate(struct sched_domain *sd)
				141	{
				142	if (cpumask_weight(sched_domain_span(sd)) == 1)
				143	return 1;
				144
				145	/* Following flags need at least 2 groups */
Valentin Schneider	e669ac8	2020-04-15 22:05:06 +0100	[diff] [blame]	146	if (sd->flags & (SD_BALANCE_NEWIDLE \|
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	147	SD_BALANCE_FORK \|
				148	SD_BALANCE_EXEC \|
				149	SD_SHARE_CPUCAPACITY \|
				150	SD_ASYM_CPUCAPACITY \|
				151	SD_SHARE_PKG_RESOURCES \|
				152	SD_SHARE_POWERDOMAIN)) {
				153	if (sd->groups != sd->groups->next)
				154	return 0;
				155	}
				156
				157	/* Following flags don't use groups */
				158	if (sd->flags & (SD_WAKE_AFFINE))
				159	return 0;
				160
				161	return 1;
				162	}
				163
				164	static int
				165	sd_parent_degenerate(struct sched_domain sd, struct sched_domain parent)
				166	{
				167	unsigned long cflags = sd->flags, pflags = parent->flags;
				168
				169	if (sd_degenerate(parent))
				170	return 1;
				171
				172	if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent)))
				173	return 0;
				174
				175	/* Flags needing groups don't count if only 1 group in parent */
				176	if (parent->groups == parent->groups->next) {
Valentin Schneider	e669ac8	2020-04-15 22:05:06 +0100	[diff] [blame]	177	pflags &= ~(SD_BALANCE_NEWIDLE \|
				178	SD_BALANCE_FORK \|
				179	SD_BALANCE_EXEC \|
				180	SD_ASYM_CPUCAPACITY \|
				181	SD_SHARE_CPUCAPACITY \|
				182	SD_SHARE_PKG_RESOURCES \|
				183	SD_PREFER_SIBLING \|
				184	SD_SHARE_POWERDOMAIN);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	185	if (nr_node_ids == 1)
				186	pflags &= ~SD_SERIALIZE;
				187	}
				188	if (~cflags & pflags)
				189	return 0;
				190
				191	return 1;
				192	}
				193
Quentin Perret	531b5c9	2018-12-03 09:56:21 +0000	[diff] [blame]	194	#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
Peter Zijlstra	f8a696f	2018-12-05 11:23:56 +0100	[diff] [blame]	195	DEFINE_STATIC_KEY_FALSE(sched_energy_present);
Quentin Perret	8d5d0cf	2018-12-03 09:56:23 +0000	[diff] [blame]	196	unsigned int sysctl_sched_energy_aware = 1;
Quentin Perret	531b5c9	2018-12-03 09:56:21 +0000	[diff] [blame]	197	DEFINE_MUTEX(sched_energy_mutex);
				198	bool sched_energy_update;
				199
Quentin Perret	8d5d0cf	2018-12-03 09:56:23 +0000	[diff] [blame]	200	#ifdef CONFIG_PROC_SYSCTL
				201	int sched_energy_aware_handler(struct ctl_table *table, int write,
Christoph Hellwig	3292739	2020-04-24 08:43:38 +0200	[diff] [blame]	202	void buffer, size_t lenp, loff_t *ppos)
Quentin Perret	8d5d0cf	2018-12-03 09:56:23 +0000	[diff] [blame]	203	{
				204	int ret, state;
				205
				206	if (write && !capable(CAP_SYS_ADMIN))
				207	return -EPERM;
				208
				209	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
				210	if (!ret && write) {
				211	state = static_branch_unlikely(&sched_energy_present);
				212	if (state != sysctl_sched_energy_aware) {
				213	mutex_lock(&sched_energy_mutex);
				214	sched_energy_update = 1;
				215	rebuild_sched_domains();
				216	sched_energy_update = 0;
				217	mutex_unlock(&sched_energy_mutex);
				218	}
				219	}
				220
				221	return ret;
				222	}
				223	#endif
				224
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	225	static void free_pd(struct perf_domain *pd)
				226	{
				227	struct perf_domain *tmp;
				228
				229	while (pd) {
				230	tmp = pd->next;
				231	kfree(pd);
				232	pd = tmp;
				233	}
				234	}
				235
				236	static struct perf_domain find_pd(struct perf_domain pd, int cpu)
				237	{
				238	while (pd) {
				239	if (cpumask_test_cpu(cpu, perf_domain_span(pd)))
				240	return pd;
				241	pd = pd->next;
				242	}
				243
				244	return NULL;
				245	}
				246
				247	static struct perf_domain *pd_init(int cpu)
				248	{
				249	struct em_perf_domain *obj = em_cpu_get(cpu);
				250	struct perf_domain *pd;
				251
				252	if (!obj) {
				253	if (sched_debug())
				254	pr_info("%s: no EM found for CPU%d\n", __func__, cpu);
				255	return NULL;
				256	}
				257
				258	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
				259	if (!pd)
				260	return NULL;
				261	pd->em_pd = obj;
				262
				263	return pd;
				264	}
				265
				266	static void perf_domain_debug(const struct cpumask *cpu_map,
				267	struct perf_domain *pd)
				268	{
				269	if (!sched_debug() \|\| !pd)
				270	return;
				271
				272	printk(KERN_DEBUG "root_domain %*pbl:", cpumask_pr_args(cpu_map));
				273
				274	while (pd) {
Lukasz Luba	521b512	2020-05-27 10:58:47 +0100	[diff] [blame]	275	printk(KERN_CONT " pd%d:{ cpus=%*pbl nr_pstate=%d }",
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	276	cpumask_first(perf_domain_span(pd)),
				277	cpumask_pr_args(perf_domain_span(pd)),
Lukasz Luba	521b512	2020-05-27 10:58:47 +0100	[diff] [blame]	278	em_pd_nr_perf_states(pd->em_pd));
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	279	pd = pd->next;
				280	}
				281
				282	printk(KERN_CONT "\n");
				283	}
				284
				285	static void destroy_perf_domain_rcu(struct rcu_head *rp)
				286	{
				287	struct perf_domain *pd;
				288
				289	pd = container_of(rp, struct perf_domain, rcu);
				290	free_pd(pd);
				291	}
				292
Quentin Perret	1f74de8	2018-12-03 09:56:22 +0000	[diff] [blame]	293	static void sched_energy_set(bool has_eas)
				294	{
				295	if (!has_eas && static_branch_unlikely(&sched_energy_present)) {
				296	if (sched_debug())
				297	pr_info("%s: stopping EAS\n", __func__);
				298	static_branch_disable_cpuslocked(&sched_energy_present);
				299	} else if (has_eas && !static_branch_unlikely(&sched_energy_present)) {
				300	if (sched_debug())
				301	pr_info("%s: starting EAS\n", __func__);
				302	static_branch_enable_cpuslocked(&sched_energy_present);
				303	}
				304	}
				305
Quentin Perret	b68a4c0	2018-12-03 09:56:20 +0000	[diff] [blame]	306	/*
				307	* EAS can be used on a root domain if it meets all the following conditions:
				308	* 1. an Energy Model (EM) is available;
				309	* 2. the SD_ASYM_CPUCAPACITY flag is set in the sched_domain hierarchy.
Valentin Schneider	38502ab	2020-02-27 19:14:32 +0000	[diff] [blame]	310	* 3. no SMT is detected.
				311	* 4. the EM complexity is low enough to keep scheduling overheads low;
				312	* 5. schedutil is driving the frequency of all CPUs of the rd;
Quentin Perret	b68a4c0	2018-12-03 09:56:20 +0000	[diff] [blame]	313	*
				314	* The complexity of the Energy Model is defined as:
				315	*
Lukasz Luba	521b512	2020-05-27 10:58:47 +0100	[diff] [blame]	316	* C = nr_pd * (nr_cpus + nr_ps)
Quentin Perret	b68a4c0	2018-12-03 09:56:20 +0000	[diff] [blame]	317	*
				318	* with parameters defined as:
				319	* - nr_pd: the number of performance domains
				320	* - nr_cpus: the number of CPUs
Lukasz Luba	521b512	2020-05-27 10:58:47 +0100	[diff] [blame]	321	* - nr_ps: the sum of the number of performance states of all performance
Quentin Perret	b68a4c0	2018-12-03 09:56:20 +0000	[diff] [blame]	322	* domains (for example, on a system with 2 performance domains,
Lukasz Luba	521b512	2020-05-27 10:58:47 +0100	[diff] [blame]	323	* with 10 performance states each, nr_ps = 2 * 10 = 20).
Quentin Perret	b68a4c0	2018-12-03 09:56:20 +0000	[diff] [blame]	324	*
				325	* It is generally not a good idea to use such a model in the wake-up path on
				326	* very complex platforms because of the associated scheduling overheads. The
				327	* arbitrary constraint below prevents that. It makes EAS usable up to 16 CPUs
Lukasz Luba	521b512	2020-05-27 10:58:47 +0100	[diff] [blame]	328	* with per-CPU DVFS and less than 8 performance states each, for example.
Quentin Perret	b68a4c0	2018-12-03 09:56:20 +0000	[diff] [blame]	329	*/
				330	#define EM_MAX_COMPLEXITY 2048
				331
Quentin Perret	531b5c9	2018-12-03 09:56:21 +0000	[diff] [blame]	332	extern struct cpufreq_governor schedutil_gov;
Quentin Perret	1f74de8	2018-12-03 09:56:22 +0000	[diff] [blame]	333	static bool build_perf_domains(const struct cpumask *cpu_map)
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	334	{
Lukasz Luba	521b512	2020-05-27 10:58:47 +0100	[diff] [blame]	335	int i, nr_pd = 0, nr_ps = 0, nr_cpus = cpumask_weight(cpu_map);
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	336	struct perf_domain pd = NULL, tmp;
				337	int cpu = cpumask_first(cpu_map);
				338	struct root_domain *rd = cpu_rq(cpu)->rd;
Quentin Perret	531b5c9	2018-12-03 09:56:21 +0000	[diff] [blame]	339	struct cpufreq_policy *policy;
				340	struct cpufreq_governor *gov;
Quentin Perret	b68a4c0	2018-12-03 09:56:20 +0000	[diff] [blame]	341
Quentin Perret	8d5d0cf	2018-12-03 09:56:23 +0000	[diff] [blame]	342	if (!sysctl_sched_energy_aware)
				343	goto free;
				344
Quentin Perret	b68a4c0	2018-12-03 09:56:20 +0000	[diff] [blame]	345	/* EAS is enabled for asymmetric CPU capacity topologies. */
				346	if (!per_cpu(sd_asym_cpucapacity, cpu)) {
				347	if (sched_debug()) {
				348	pr_info("rd %*pbl: CPUs do not have asymmetric capacities\n",
				349	cpumask_pr_args(cpu_map));
				350	}
				351	goto free;
				352	}
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	353
Valentin Schneider	38502ab	2020-02-27 19:14:32 +0000	[diff] [blame]	354	/* EAS definitely does not handle SMT */
				355	if (sched_smt_active()) {
				356	pr_warn("rd %*pbl: Disabling EAS, SMT is not supported\n",
				357	cpumask_pr_args(cpu_map));
				358	goto free;
				359	}
				360
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	361	for_each_cpu(i, cpu_map) {
				362	/* Skip already covered CPUs. */
				363	if (find_pd(pd, i))
				364	continue;
				365
Quentin Perret	531b5c9	2018-12-03 09:56:21 +0000	[diff] [blame]	366	/* Do not attempt EAS if schedutil is not being used. */
				367	policy = cpufreq_cpu_get(i);
				368	if (!policy)
				369	goto free;
				370	gov = policy->governor;
				371	cpufreq_cpu_put(policy);
				372	if (gov != &schedutil_gov) {
				373	if (rd->pd)
				374	pr_warn("rd %*pbl: Disabling EAS, schedutil is mandatory\n",
				375	cpumask_pr_args(cpu_map));
				376	goto free;
				377	}
				378
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	379	/* Create the new pd and add it to the local list. */
				380	tmp = pd_init(i);
				381	if (!tmp)
				382	goto free;
				383	tmp->next = pd;
				384	pd = tmp;
Quentin Perret	b68a4c0	2018-12-03 09:56:20 +0000	[diff] [blame]	385
				386	/*
Lukasz Luba	521b512	2020-05-27 10:58:47 +0100	[diff] [blame]	387	* Count performance domains and performance states for the
Quentin Perret	b68a4c0	2018-12-03 09:56:20 +0000	[diff] [blame]	388	* complexity check.
				389	*/
				390	nr_pd++;
Lukasz Luba	521b512	2020-05-27 10:58:47 +0100	[diff] [blame]	391	nr_ps += em_pd_nr_perf_states(pd->em_pd);
Quentin Perret	b68a4c0	2018-12-03 09:56:20 +0000	[diff] [blame]	392	}
				393
				394	/* Bail out if the Energy Model complexity is too high. */
Lukasz Luba	521b512	2020-05-27 10:58:47 +0100	[diff] [blame]	395	if (nr_pd * (nr_ps + nr_cpus) > EM_MAX_COMPLEXITY) {
Quentin Perret	b68a4c0	2018-12-03 09:56:20 +0000	[diff] [blame]	396	WARN(1, "rd %*pbl: Failed to start EAS, EM complexity is too high\n",
				397	cpumask_pr_args(cpu_map));
				398	goto free;
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	399	}
				400
				401	perf_domain_debug(cpu_map, pd);
				402
				403	/* Attach the new list of performance domains to the root domain. */
				404	tmp = rd->pd;
				405	rcu_assign_pointer(rd->pd, pd);
				406	if (tmp)
				407	call_rcu(&tmp->rcu, destroy_perf_domain_rcu);
				408
Quentin Perret	1f74de8	2018-12-03 09:56:22 +0000	[diff] [blame]	409	return !!pd;
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	410
				411	free:
				412	free_pd(pd);
				413	tmp = rd->pd;
				414	rcu_assign_pointer(rd->pd, NULL);
				415	if (tmp)
				416	call_rcu(&tmp->rcu, destroy_perf_domain_rcu);
Quentin Perret	1f74de8	2018-12-03 09:56:22 +0000	[diff] [blame]	417
				418	return false;
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	419	}
				420	#else
				421	static void free_pd(struct perf_domain *pd) { }
Quentin Perret	531b5c9	2018-12-03 09:56:21 +0000	[diff] [blame]	422	#endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL*/
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	423
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	424	static void free_rootdomain(struct rcu_head *rcu)
				425	{
				426	struct root_domain *rd = container_of(rcu, struct root_domain, rcu);
				427
				428	cpupri_cleanup(&rd->cpupri);
				429	cpudl_cleanup(&rd->cpudl);
				430	free_cpumask_var(rd->dlo_mask);
				431	free_cpumask_var(rd->rto_mask);
				432	free_cpumask_var(rd->online);
				433	free_cpumask_var(rd->span);
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	434	free_pd(rd->pd);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	435	kfree(rd);
				436	}
				437
				438	void rq_attach_root(struct rq rq, struct root_domain rd)
				439	{
				440	struct root_domain *old_rd = NULL;
				441	unsigned long flags;
				442
				443	raw_spin_lock_irqsave(&rq->lock, flags);
				444
				445	if (rq->rd) {
				446	old_rd = rq->rd;
				447
				448	if (cpumask_test_cpu(rq->cpu, old_rd->online))
				449	set_rq_offline(rq);
				450
				451	cpumask_clear_cpu(rq->cpu, old_rd->span);
				452
				453	/*
				454	* If we dont want to free the old_rd yet then
				455	* set old_rd to NULL to skip the freeing later
				456	* in this function:
				457	*/
				458	if (!atomic_dec_and_test(&old_rd->refcount))
				459	old_rd = NULL;
				460	}
				461
				462	atomic_inc(&rd->refcount);
				463	rq->rd = rd;
				464
				465	cpumask_set_cpu(rq->cpu, rd->span);
				466	if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
				467	set_rq_online(rq);
				468
				469	raw_spin_unlock_irqrestore(&rq->lock, flags);
				470
				471	if (old_rd)
Paul E. McKenney	337e9b0	2018-11-06 19:10:53 -0800	[diff] [blame]	472	call_rcu(&old_rd->rcu, free_rootdomain);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	473	}
				474
Steven Rostedt (VMware)	364f566	2018-01-23 20:45:38 -0500	[diff] [blame]	475	void sched_get_rd(struct root_domain *rd)
				476	{
				477	atomic_inc(&rd->refcount);
				478	}
				479
				480	void sched_put_rd(struct root_domain *rd)
				481	{
				482	if (!atomic_dec_and_test(&rd->refcount))
				483	return;
				484
Paul E. McKenney	337e9b0	2018-11-06 19:10:53 -0800	[diff] [blame]	485	call_rcu(&rd->rcu, free_rootdomain);
Steven Rostedt (VMware)	364f566	2018-01-23 20:45:38 -0500	[diff] [blame]	486	}
				487
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	488	static int init_rootdomain(struct root_domain *rd)
				489	{
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	490	if (!zalloc_cpumask_var(&rd->span, GFP_KERNEL))
				491	goto out;
				492	if (!zalloc_cpumask_var(&rd->online, GFP_KERNEL))
				493	goto free_span;
				494	if (!zalloc_cpumask_var(&rd->dlo_mask, GFP_KERNEL))
				495	goto free_online;
				496	if (!zalloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
				497	goto free_dlo_mask;
				498
Steven Rostedt (Red Hat)	4bdced5	2017-10-06 14:05:04 -0400	[diff] [blame]	499	#ifdef HAVE_RT_PUSH_IPI
				500	rd->rto_cpu = -1;
				501	raw_spin_lock_init(&rd->rto_lock);
				502	init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
				503	#endif
				504
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	505	init_dl_bw(&rd->dl_bw);
				506	if (cpudl_init(&rd->cpudl) != 0)
				507	goto free_rto_mask;
				508
				509	if (cpupri_init(&rd->cpupri) != 0)
				510	goto free_cpudl;
				511	return 0;
				512
				513	free_cpudl:
				514	cpudl_cleanup(&rd->cpudl);
				515	free_rto_mask:
				516	free_cpumask_var(rd->rto_mask);
				517	free_dlo_mask:
				518	free_cpumask_var(rd->dlo_mask);
				519	free_online:
				520	free_cpumask_var(rd->online);
				521	free_span:
				522	free_cpumask_var(rd->span);
				523	out:
				524	return -ENOMEM;
				525	}
				526
				527	/*
				528	* By default the system creates a single root-domain with all CPUs as
				529	* members (mimicking the global state we have today).
				530	*/
				531	struct root_domain def_root_domain;
				532
				533	void init_defrootdomain(void)
				534	{
				535	init_rootdomain(&def_root_domain);
				536
				537	atomic_set(&def_root_domain.refcount, 1);
				538	}
				539
				540	static struct root_domain *alloc_rootdomain(void)
				541	{
				542	struct root_domain *rd;
				543
Viresh Kumar	4d13a06	2017-04-13 14:45:48 +0530	[diff] [blame]	544	rd = kzalloc(sizeof(*rd), GFP_KERNEL);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	545	if (!rd)
				546	return NULL;
				547
				548	if (init_rootdomain(rd) != 0) {
				549	kfree(rd);
				550	return NULL;
				551	}
				552
				553	return rd;
				554	}
				555
				556	static void free_sched_groups(struct sched_group *sg, int free_sgc)
				557	{
				558	struct sched_group tmp, first;
				559
				560	if (!sg)
				561	return;
				562
				563	first = sg;
				564	do {
				565	tmp = sg->next;
				566
				567	if (free_sgc && atomic_dec_and_test(&sg->sgc->ref))
				568	kfree(sg->sgc);
				569
Shu Wang	213c5a4	2017-08-10 15:52:16 +0800	[diff] [blame]	570	if (atomic_dec_and_test(&sg->ref))
				571	kfree(sg);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	572	sg = tmp;
				573	} while (sg != first);
				574	}
				575
				576	static void destroy_sched_domain(struct sched_domain *sd)
				577	{
				578	/*
Peter Zijlstra	a090c4f	2017-08-21 15:42:52 +0200	[diff] [blame]	579	* A normal sched domain may have multiple group references, an
				580	* overlapping domain, having private groups, only one. Iterate,
				581	* dropping group/capacity references, freeing where none remain.
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	582	*/
Shu Wang	213c5a4	2017-08-10 15:52:16 +0800	[diff] [blame]	583	free_sched_groups(sd->groups, 1);
				584
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	585	if (sd->shared && atomic_dec_and_test(&sd->shared->ref))
				586	kfree(sd->shared);
				587	kfree(sd);
				588	}
				589
				590	static void destroy_sched_domains_rcu(struct rcu_head *rcu)
				591	{
				592	struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
				593
				594	while (sd) {
				595	struct sched_domain *parent = sd->parent;
				596	destroy_sched_domain(sd);
				597	sd = parent;
				598	}
				599	}
				600
				601	static void destroy_sched_domains(struct sched_domain *sd)
				602	{
				603	if (sd)
				604	call_rcu(&sd->rcu, destroy_sched_domains_rcu);
				605	}
				606
				607	/*
				608	* Keep a special pointer to the highest sched_domain that has
				609	* SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this
				610	* allows us to avoid some pointer chasing select_idle_sibling().
				611	*
				612	* Also keep a unique ID per domain (we use the first CPU number in
				613	* the cpumask of the domain), this allows us to quickly tell if
				614	* two CPUs are in the same cache domain, see cpus_share_cache().
				615	*/
Joel Fernandes (Google)	994aeb7	2019-03-20 20:34:24 -0400	[diff] [blame]	616	DEFINE_PER_CPU(struct sched_domain __rcu *, sd_llc);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	617	DEFINE_PER_CPU(int, sd_llc_size);
				618	DEFINE_PER_CPU(int, sd_llc_id);
Joel Fernandes (Google)	994aeb7	2019-03-20 20:34:24 -0400	[diff] [blame]	619	DEFINE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
				620	DEFINE_PER_CPU(struct sched_domain __rcu *, sd_numa);
				621	DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
				622	DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
Morten Rasmussen	df054e8	2018-07-04 11:17:39 +0100	[diff] [blame]	623	DEFINE_STATIC_KEY_FALSE(sched_asym_cpucapacity);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	624
				625	static void update_top_cache_domain(int cpu)
				626	{
				627	struct sched_domain_shared *sds = NULL;
				628	struct sched_domain *sd;
				629	int id = cpu;
				630	int size = 1;
				631
				632	sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
				633	if (sd) {
				634	id = cpumask_first(sched_domain_span(sd));
				635	size = cpumask_weight(sched_domain_span(sd));
				636	sds = sd->shared;
				637	}
				638
				639	rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
				640	per_cpu(sd_llc_size, cpu) = size;
				641	per_cpu(sd_llc_id, cpu) = id;
				642	rcu_assign_pointer(per_cpu(sd_llc_shared, cpu), sds);
				643
				644	sd = lowest_flag_domain(cpu, SD_NUMA);
				645	rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);
				646
				647	sd = highest_flag_domain(cpu, SD_ASYM_PACKING);
Quentin Perret	011b27b	2018-12-03 09:56:19 +0000	[diff] [blame]	648	rcu_assign_pointer(per_cpu(sd_asym_packing, cpu), sd);
				649
				650	sd = lowest_flag_domain(cpu, SD_ASYM_CPUCAPACITY);
				651	rcu_assign_pointer(per_cpu(sd_asym_cpucapacity, cpu), sd);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	652	}
				653
				654	/*
				655	* Attach the domain 'sd' to 'cpu' as its base domain. Callers must
				656	* hold the hotplug lock.
				657	*/
				658	static void
				659	cpu_attach_domain(struct sched_domain sd, struct root_domain rd, int cpu)
				660	{
				661	struct rq *rq = cpu_rq(cpu);
				662	struct sched_domain *tmp;
				663
				664	/* Remove the sched domains which do not contribute to scheduling. */
				665	for (tmp = sd; tmp; ) {
				666	struct sched_domain *parent = tmp->parent;
				667	if (!parent)
				668	break;
				669
				670	if (sd_parent_degenerate(tmp, parent)) {
				671	tmp->parent = parent->parent;
				672	if (parent->parent)
				673	parent->parent->child = tmp;
				674	/*
				675	* Transfer SD_PREFER_SIBLING down in case of a
				676	* degenerate parent; the spans match for this
				677	* so the property transfers.
				678	*/
				679	if (parent->flags & SD_PREFER_SIBLING)
				680	tmp->flags \|= SD_PREFER_SIBLING;
				681	destroy_sched_domain(parent);
				682	} else
				683	tmp = tmp->parent;
				684	}
				685
				686	if (sd && sd_degenerate(sd)) {
				687	tmp = sd;
				688	sd = sd->parent;
				689	destroy_sched_domain(tmp);
				690	if (sd)
				691	sd->child = NULL;
				692	}
				693
				694	sched_domain_debug(sd, cpu);
				695
				696	rq_attach_root(rq, rd);
				697	tmp = rq->sd;
				698	rcu_assign_pointer(rq->sd, sd);
Peter Zijlstra	bbdacdf	2017-08-10 17:10:26 +0200	[diff] [blame]	699	dirty_sched_domain_sysctl(cpu);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	700	destroy_sched_domains(tmp);
				701
				702	update_top_cache_domain(cpu);
				703	}
				704
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	705	struct s_data {
Luc Van Oostenryck	99687cd	2019-01-18 15:49:36 +0100	[diff] [blame]	706	struct sched_domain * __percpu *sd;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	707	struct root_domain *rd;
				708	};
				709
				710	enum s_alloc {
				711	sa_rootdomain,
				712	sa_sd,
				713	sa_sd_storage,
				714	sa_none,
				715	};
				716
				717	/*
Peter Zijlstra	35a566e	2017-04-28 10:54:26 +0200	[diff] [blame]	718	* Return the canonical balance CPU for this group, this is the first CPU
Peter Zijlstra	e5c14b1	2017-05-01 10:47:02 +0200	[diff] [blame]	719	* of this group that's also in the balance mask.
Peter Zijlstra	35a566e	2017-04-28 10:54:26 +0200	[diff] [blame]	720	*
Peter Zijlstra	e5c14b1	2017-05-01 10:47:02 +0200	[diff] [blame]	721	* The balance mask are all those CPUs that could actually end up at this
				722	* group. See build_balance_mask().
Peter Zijlstra	35a566e	2017-04-28 10:54:26 +0200	[diff] [blame]	723	*
				724	* Also see should_we_balance().
				725	*/
				726	int group_balance_cpu(struct sched_group *sg)
				727	{
Peter Zijlstra	e5c14b1	2017-05-01 10:47:02 +0200	[diff] [blame]	728	return cpumask_first(group_balance_mask(sg));
Peter Zijlstra	35a566e	2017-04-28 10:54:26 +0200	[diff] [blame]	729	}
				730
				731
				732	/*
				733	* NUMA topology (first read the regular topology blurb below)
				734	*
				735	* Given a node-distance table, for example:
				736	*
				737	* node 0 1 2 3
				738	* 0: 10 20 30 20
				739	* 1: 20 10 20 30
				740	* 2: 30 20 10 20
				741	* 3: 20 30 20 10
				742	*
				743	* which represents a 4 node ring topology like:
				744	*
				745	* 0 ----- 1
				746	* \| \|
				747	* \| \|
				748	* \| \|
				749	* 3 ----- 2
				750	*
				751	* We want to construct domains and groups to represent this. The way we go
				752	* about doing this is to build the domains on 'hops'. For each NUMA level we
				753	* construct the mask of all nodes reachable in @level hops.
				754	*
				755	* For the above NUMA topology that gives 3 levels:
				756	*
				757	* NUMA-2 0-3 0-3 0-3 0-3
				758	* groups: {0-1,3},{1-3} {0-2},{0,2-3} {1-3},{0-1,3} {0,2-3},{0-2}
				759	*
				760	* NUMA-1 0-1,3 0-2 1-3 0,2-3
				761	* groups: {0},{1},{3} {0},{1},{2} {1},{2},{3} {0},{2},{3}
				762	*
				763	* NUMA-0 0 1 2 3
				764	*
				765	*
				766	* As can be seen; things don't nicely line up as with the regular topology.
				767	* When we iterate a domain in child domain chunks some nodes can be
				768	* represented multiple times -- hence the "overlap" naming for this part of
				769	* the topology.
				770	*
				771	* In order to minimize this overlap, we only build enough groups to cover the
				772	* domain. For instance Node-0 NUMA-2 would only get groups: 0-1,3 and 1-3.
				773	*
				774	* Because:
				775	*
				776	* - the first group of each domain is its child domain; this
				777	* gets us the first 0-1,3
				778	* - the only uncovered node is 2, who's child domain is 1-3.
				779	*
				780	* However, because of the overlap, computing a unique CPU for each group is
				781	* more complicated. Consider for instance the groups of NODE-1 NUMA-2, both
				782	* groups include the CPUs of Node-0, while those CPUs would not in fact ever
				783	* end up at those groups (they would end up in group: 0-1,3).
				784	*
Peter Zijlstra	e5c14b1	2017-05-01 10:47:02 +0200	[diff] [blame]	785	* To correct this we have to introduce the group balance mask. This mask
Peter Zijlstra	35a566e	2017-04-28 10:54:26 +0200	[diff] [blame]	786	* will contain those CPUs in the group that can reach this group given the
				787	* (child) domain tree.
				788	*
				789	* With this we can once again compute balance_cpu and sched_group_capacity
				790	* relations.
				791	*
				792	* XXX include words on how balance_cpu is unique and therefore can be
				793	* used for sched_group_capacity links.
				794	*
				795	*
				796	* Another 'interesting' topology is:
				797	*
				798	* node 0 1 2 3
				799	* 0: 10 20 20 30
				800	* 1: 20 10 20 20
				801	* 2: 20 20 10 20
				802	* 3: 30 20 20 10
				803	*
				804	* Which looks a little like:
				805	*
				806	* 0 ----- 1
				807	* \| / \|
				808	* \| / \|
				809	* \| / \|
				810	* 2 ----- 3
				811	*
				812	* This topology is asymmetric, nodes 1,2 are fully connected, but nodes 0,3
				813	* are not.
				814	*
				815	* This leads to a few particularly weird cases where the sched_domain's are
Ingo Molnar	97fb7a0	2018-03-03 14:01:12 +0100	[diff] [blame]	816	* not of the same number for each CPU. Consider:
Peter Zijlstra	35a566e	2017-04-28 10:54:26 +0200	[diff] [blame]	817	*
				818	* NUMA-2 0-3 0-3
				819	* groups: {0-2},{1-3} {1-3},{0-2}
				820	*
				821	* NUMA-1 0-2 0-3 0-3 1-3
				822	*
				823	* NUMA-0 0 1 2 3
				824	*
				825	*/
				826
				827
				828	/*
Peter Zijlstra	e5c14b1	2017-05-01 10:47:02 +0200	[diff] [blame]	829	* Build the balance mask; it contains only those CPUs that can arrive at this
				830	* group and should be considered to continue balancing.
Peter Zijlstra	35a566e	2017-04-28 10:54:26 +0200	[diff] [blame]	831	*
				832	* We do this during the group creation pass, therefore the group information
				833	* isn't complete yet, however since each group represents a (child) domain we
				834	* can fully construct this using the sched_domain bits (which are already
				835	* complete).
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	836	*/
Peter Zijlstra	1676330	2017-04-25 14:31:11 +0200	[diff] [blame]	837	static void
Peter Zijlstra	e5c14b1	2017-05-01 10:47:02 +0200	[diff] [blame]	838	build_balance_mask(struct sched_domain sd, struct sched_group sg, struct cpumask *mask)
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	839	{
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	840	const struct cpumask *sg_span = sched_group_span(sg);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	841	struct sd_data *sdd = sd->private;
				842	struct sched_domain *sibling;
				843	int i;
				844
Peter Zijlstra	1676330	2017-04-25 14:31:11 +0200	[diff] [blame]	845	cpumask_clear(mask);
				846
Lauro Ramos Venancio	f32d782	2017-04-20 16:51:40 -0300	[diff] [blame]	847	for_each_cpu(i, sg_span) {
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	848	sibling = *per_cpu_ptr(sdd->sd, i);
Peter Zijlstra	73bb059	2017-04-25 14:00:49 +0200	[diff] [blame]	849
				850	/*
				851	* Can happen in the asymmetric case, where these siblings are
				852	* unused. The mask will not be empty because those CPUs that
				853	* do have the top domain _should_ span the domain.
				854	*/
				855	if (!sibling->child)
				856	continue;
				857
				858	/* If we would not end up here, we can't continue from here */
				859	if (!cpumask_equal(sg_span, sched_domain_span(sibling->child)))
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	860	continue;
				861
Peter Zijlstra	1676330	2017-04-25 14:31:11 +0200	[diff] [blame]	862	cpumask_set_cpu(i, mask);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	863	}
Peter Zijlstra	73bb059	2017-04-25 14:00:49 +0200	[diff] [blame]	864
				865	/* We must not have empty masks here */
Peter Zijlstra	1676330	2017-04-25 14:31:11 +0200	[diff] [blame]	866	WARN_ON_ONCE(cpumask_empty(mask));
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	867	}
				868
				869	/*
Peter Zijlstra	35a566e	2017-04-28 10:54:26 +0200	[diff] [blame]	870	* XXX: This creates per-node group entries; since the load-balancer will
				871	* immediately access remote memory to construct this group's load-balance
				872	* statistics having the groups node local is of dubious benefit.
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	873	*/
Lauro Ramos Venancio	8c03346	2017-04-13 10:56:07 -0300	[diff] [blame]	874	static struct sched_group *
				875	build_group_from_child_sched_domain(struct sched_domain *sd, int cpu)
				876	{
				877	struct sched_group *sg;
				878	struct cpumask *sg_span;
				879
				880	sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
				881	GFP_KERNEL, cpu_to_node(cpu));
				882
				883	if (!sg)
				884	return NULL;
				885
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	886	sg_span = sched_group_span(sg);
Lauro Ramos Venancio	8c03346	2017-04-13 10:56:07 -0300	[diff] [blame]	887	if (sd->child)
				888	cpumask_copy(sg_span, sched_domain_span(sd->child));
				889	else
				890	cpumask_copy(sg_span, sched_domain_span(sd));
				891
Shu Wang	213c5a4	2017-08-10 15:52:16 +0800	[diff] [blame]	892	atomic_inc(&sg->ref);
Lauro Ramos Venancio	8c03346	2017-04-13 10:56:07 -0300	[diff] [blame]	893	return sg;
				894	}
				895
				896	static void init_overlap_sched_group(struct sched_domain *sd,
Peter Zijlstra	1676330	2017-04-25 14:31:11 +0200	[diff] [blame]	897	struct sched_group *sg)
Lauro Ramos Venancio	8c03346	2017-04-13 10:56:07 -0300	[diff] [blame]	898	{
Peter Zijlstra	1676330	2017-04-25 14:31:11 +0200	[diff] [blame]	899	struct cpumask *mask = sched_domains_tmpmask2;
Lauro Ramos Venancio	8c03346	2017-04-13 10:56:07 -0300	[diff] [blame]	900	struct sd_data *sdd = sd->private;
				901	struct cpumask *sg_span;
Peter Zijlstra	1676330	2017-04-25 14:31:11 +0200	[diff] [blame]	902	int cpu;
				903
Peter Zijlstra	e5c14b1	2017-05-01 10:47:02 +0200	[diff] [blame]	904	build_balance_mask(sd, sg, mask);
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	905	cpu = cpumask_first_and(sched_group_span(sg), mask);
Lauro Ramos Venancio	8c03346	2017-04-13 10:56:07 -0300	[diff] [blame]	906
				907	sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);
				908	if (atomic_inc_return(&sg->sgc->ref) == 1)
Peter Zijlstra	e5c14b1	2017-05-01 10:47:02 +0200	[diff] [blame]	909	cpumask_copy(group_balance_mask(sg), mask);
Peter Zijlstra	35a566e	2017-04-28 10:54:26 +0200	[diff] [blame]	910	else
Peter Zijlstra	e5c14b1	2017-05-01 10:47:02 +0200	[diff] [blame]	911	WARN_ON_ONCE(!cpumask_equal(group_balance_mask(sg), mask));
Lauro Ramos Venancio	8c03346	2017-04-13 10:56:07 -0300	[diff] [blame]	912
				913	/*
				914	* Initialize sgc->capacity such that even if we mess up the
				915	* domains and no possible iteration will get us here, we won't
				916	* die on a /0 trap.
				917	*/
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	918	sg_span = sched_group_span(sg);
Lauro Ramos Venancio	8c03346	2017-04-13 10:56:07 -0300	[diff] [blame]	919	sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
				920	sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
Morten Rasmussen	e3d6d0c	2018-07-04 11:17:41 +0100	[diff] [blame]	921	sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
Lauro Ramos Venancio	8c03346	2017-04-13 10:56:07 -0300	[diff] [blame]	922	}
				923
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	924	static int
				925	build_overlap_sched_groups(struct sched_domain *sd, int cpu)
				926	{
Peter Zijlstra	91eaed0	2017-04-14 17:32:07 +0200	[diff] [blame]	927	struct sched_group first = NULL, last = NULL, *sg;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	928	const struct cpumask *span = sched_domain_span(sd);
				929	struct cpumask *covered = sched_domains_tmpmask;
				930	struct sd_data *sdd = sd->private;
				931	struct sched_domain *sibling;
				932	int i;
				933
				934	cpumask_clear(covered);
				935
Peter Zijlstra	0372dd2	2017-04-14 17:24:02 +0200	[diff] [blame]	936	for_each_cpu_wrap(i, span, cpu) {
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	937	struct cpumask *sg_span;
				938
				939	if (cpumask_test_cpu(i, covered))
				940	continue;
				941
				942	sibling = *per_cpu_ptr(sdd->sd, i);
				943
Lauro Ramos Venancio	c20e1ea	2017-04-20 16:51:42 -0300	[diff] [blame]	944	/*
				945	* Asymmetric node setups can result in situations where the
				946	* domain tree is of unequal depth, make sure to skip domains
				947	* that already cover the entire range.
				948	*
				949	* In that case build_sched_domains() will have terminated the
				950	* iteration early and our sibling sd spans will be empty.
				951	* Domains should always include the CPU they're built on, so
				952	* check that.
				953	*/
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	954	if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
				955	continue;
				956
Lauro Ramos Venancio	8c03346	2017-04-13 10:56:07 -0300	[diff] [blame]	957	sg = build_group_from_child_sched_domain(sibling, cpu);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	958	if (!sg)
				959	goto fail;
				960
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	961	sg_span = sched_group_span(sg);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	962	cpumask_or(covered, covered, sg_span);
				963
Peter Zijlstra	1676330	2017-04-25 14:31:11 +0200	[diff] [blame]	964	init_overlap_sched_group(sd, sg);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	965
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	966	if (!first)
				967	first = sg;
				968	if (last)
				969	last->next = sg;
				970	last = sg;
				971	last->next = first;
				972	}
Peter Zijlstra	91eaed0	2017-04-14 17:32:07 +0200	[diff] [blame]	973	sd->groups = first;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	974
				975	return 0;
				976
				977	fail:
				978	free_sched_groups(first, 0);
				979
				980	return -ENOMEM;
				981	}
				982
Peter Zijlstra	35a566e	2017-04-28 10:54:26 +0200	[diff] [blame]	983
				984	/*
				985	* Package topology (also see the load-balance blurb in fair.c)
				986	*
				987	* The scheduler builds a tree structure to represent a number of important
				988	* topology features. By default (default_topology[]) these include:
				989	*
				990	* - Simultaneous multithreading (SMT)
				991	* - Multi-Core Cache (MC)
				992	* - Package (DIE)
				993	*
				994	* Where the last one more or less denotes everything up to a NUMA node.
				995	*
				996	* The tree consists of 3 primary data structures:
				997	*
				998	* sched_domain -> sched_group -> sched_group_capacity
				999	* ^ ^ ^ ^
				1000	* `-' `-'
				1001	*
Ingo Molnar	97fb7a0	2018-03-03 14:01:12 +0100	[diff] [blame]	1002	* The sched_domains are per-CPU and have a two way link (parent & child) and
Peter Zijlstra	35a566e	2017-04-28 10:54:26 +0200	[diff] [blame]	1003	* denote the ever growing mask of CPUs belonging to that level of topology.
				1004	*
				1005	* Each sched_domain has a circular (double) linked list of sched_group's, each
				1006	* denoting the domains of the level below (or individual CPUs in case of the
				1007	* first domain level). The sched_group linked by a sched_domain includes the
				1008	* CPU of that sched_domain [*].
				1009	*
				1010	* Take for instance a 2 threaded, 2 core, 2 cache cluster part:
				1011	*
				1012	* CPU 0 1 2 3 4 5 6 7
				1013	*
				1014	* DIE [ ]
				1015	* MC [ ] [ ]
				1016	* SMT [ ] [ ] [ ] [ ]
				1017	*
				1018	* - or -
				1019	*
				1020	* DIE 0-7 0-7 0-7 0-7 0-7 0-7 0-7 0-7
				1021	* MC 0-3 0-3 0-3 0-3 4-7 4-7 4-7 4-7
				1022	* SMT 0-1 0-1 2-3 2-3 4-5 4-5 6-7 6-7
				1023	*
				1024	* CPU 0 1 2 3 4 5 6 7
				1025	*
				1026	* One way to think about it is: sched_domain moves you up and down among these
				1027	* topology levels, while sched_group moves you sideways through it, at child
				1028	* domain granularity.
				1029	*
				1030	* sched_group_capacity ensures each unique sched_group has shared storage.
				1031	*
				1032	* There are two related construction problems, both require a CPU that
				1033	* uniquely identify each group (for a given domain):
				1034	*
				1035	* - The first is the balance_cpu (see should_we_balance() and the
				1036	* load-balance blub in fair.c); for each group we only want 1 CPU to
				1037	* continue balancing at a higher domain.
				1038	*
				1039	* - The second is the sched_group_capacity; we want all identical groups
				1040	* to share a single sched_group_capacity.
				1041	*
				1042	* Since these topologies are exclusive by construction. That is, its
				1043	* impossible for an SMT thread to belong to multiple cores, and cores to
				1044	* be part of multiple caches. There is a very clear and unique location
				1045	* for each CPU in the hierarchy.
				1046	*
				1047	* Therefore computing a unique CPU for each group is trivial (the iteration
				1048	* mask is redundant and set all 1s; all CPUs in a group will end up at _that_
				1049	* group), we can simply pick the first CPU in each group.
				1050	*
				1051	*
				1052	* [*] in other words, the first group of each domain is its child domain.
				1053	*/
				1054
Peter Zijlstra	0c0e776	2017-05-03 14:18:06 +0200	[diff] [blame]	1055	static struct sched_group get_group(int cpu, struct sd_data sdd)
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1056	{
				1057	struct sched_domain sd = per_cpu_ptr(sdd->sd, cpu);
				1058	struct sched_domain *child = sd->child;
Peter Zijlstra	0c0e776	2017-05-03 14:18:06 +0200	[diff] [blame]	1059	struct sched_group *sg;
Valentin Schneider	67d4f6f	2019-04-09 18:35:45 +0100	[diff] [blame]	1060	bool already_visited;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1061
				1062	if (child)
				1063	cpu = cpumask_first(sched_domain_span(child));
				1064
Peter Zijlstra	0c0e776	2017-05-03 14:18:06 +0200	[diff] [blame]	1065	sg = *per_cpu_ptr(sdd->sg, cpu);
				1066	sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1067
Valentin Schneider	67d4f6f	2019-04-09 18:35:45 +0100	[diff] [blame]	1068	/* Increase refcounts for claim_allocations: */
				1069	already_visited = atomic_inc_return(&sg->ref) > 1;
				1070	/* sgc visits should follow a similar trend as sg */
				1071	WARN_ON(already_visited != (atomic_inc_return(&sg->sgc->ref) > 1));
				1072
				1073	/* If we have already visited that group, it's already initialized. */
				1074	if (already_visited)
				1075	return sg;
Peter Zijlstra	0c0e776	2017-05-03 14:18:06 +0200	[diff] [blame]	1076
				1077	if (child) {
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	1078	cpumask_copy(sched_group_span(sg), sched_domain_span(child));
				1079	cpumask_copy(group_balance_mask(sg), sched_group_span(sg));
Peter Zijlstra	0c0e776	2017-05-03 14:18:06 +0200	[diff] [blame]	1080	} else {
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	1081	cpumask_set_cpu(cpu, sched_group_span(sg));
Peter Zijlstra	e5c14b1	2017-05-01 10:47:02 +0200	[diff] [blame]	1082	cpumask_set_cpu(cpu, group_balance_mask(sg));
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1083	}
				1084
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	1085	sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sched_group_span(sg));
Peter Zijlstra	0c0e776	2017-05-03 14:18:06 +0200	[diff] [blame]	1086	sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
Morten Rasmussen	e3d6d0c	2018-07-04 11:17:41 +0100	[diff] [blame]	1087	sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
Peter Zijlstra	0c0e776	2017-05-03 14:18:06 +0200	[diff] [blame]	1088
				1089	return sg;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1090	}
				1091
				1092	/*
				1093	* build_sched_groups will build a circular linked list of the groups
Valentin Schneider	d874323	2019-04-09 18:35:46 +0100	[diff] [blame]	1094	* covered by the given span, will set each group's ->cpumask correctly,
				1095	* and will initialize their ->sgc.
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1096	*
				1097	* Assumes the sched_domain tree is fully constructed
				1098	*/
				1099	static int
				1100	build_sched_groups(struct sched_domain *sd, int cpu)
				1101	{
				1102	struct sched_group first = NULL, last = NULL;
				1103	struct sd_data *sdd = sd->private;
				1104	const struct cpumask *span = sched_domain_span(sd);
				1105	struct cpumask *covered;
				1106	int i;
				1107
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1108	lockdep_assert_held(&sched_domains_mutex);
				1109	covered = sched_domains_tmpmask;
				1110
				1111	cpumask_clear(covered);
				1112
Peter Zijlstra	0c0e776	2017-05-03 14:18:06 +0200	[diff] [blame]	1113	for_each_cpu_wrap(i, span, cpu) {
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1114	struct sched_group *sg;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1115
				1116	if (cpumask_test_cpu(i, covered))
				1117	continue;
				1118
Peter Zijlstra	0c0e776	2017-05-03 14:18:06 +0200	[diff] [blame]	1119	sg = get_group(i, sdd);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1120
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	1121	cpumask_or(covered, covered, sched_group_span(sg));
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1122
				1123	if (!first)
				1124	first = sg;
				1125	if (last)
				1126	last->next = sg;
				1127	last = sg;
				1128	}
				1129	last->next = first;
Peter Zijlstra	0c0e776	2017-05-03 14:18:06 +0200	[diff] [blame]	1130	sd->groups = first;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1131
				1132	return 0;
				1133	}
				1134
				1135	/*
				1136	* Initialize sched groups cpu_capacity.
				1137	*
				1138	* cpu_capacity indicates the capacity of sched group, which is used while
				1139	* distributing the load between different sched groups in a sched domain.
				1140	* Typically cpu_capacity for all the groups in a sched domain will be same
				1141	* unless there are asymmetries in the topology. If there are asymmetries,
				1142	* group having more cpu_capacity will pickup more load compared to the
				1143	* group having less cpu_capacity.
				1144	*/
				1145	static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
				1146	{
				1147	struct sched_group *sg = sd->groups;
				1148
				1149	WARN_ON(!sg);
				1150
				1151	do {
				1152	int cpu, max_cpu = -1;
				1153
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	1154	sg->group_weight = cpumask_weight(sched_group_span(sg));
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1155
				1156	if (!(sd->flags & SD_ASYM_PACKING))
				1157	goto next;
				1158
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	1159	for_each_cpu(cpu, sched_group_span(sg)) {
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1160	if (max_cpu < 0)
				1161	max_cpu = cpu;
				1162	else if (sched_asym_prefer(cpu, max_cpu))
				1163	max_cpu = cpu;
				1164	}
				1165	sg->asym_prefer_cpu = max_cpu;
				1166
				1167	next:
				1168	sg = sg->next;
				1169	} while (sg != sd->groups);
				1170
				1171	if (cpu != group_balance_cpu(sg))
				1172	return;
				1173
				1174	update_group_capacity(sd, cpu);
				1175	}
				1176
				1177	/*
				1178	* Initializers for schedule domains
				1179	* Non-inlined to reduce accumulated stack pressure in build_sched_domains()
				1180	*/
				1181
				1182	static int default_relax_domain_level = -1;
				1183	int sched_domain_level_max;
				1184
				1185	static int __init setup_relax_domain_level(char *str)
				1186	{
				1187	if (kstrtoint(str, 0, &default_relax_domain_level))
				1188	pr_warn("Unable to set relax_domain_level\n");
				1189
				1190	return 1;
				1191	}
				1192	__setup("relax_domain_level=", setup_relax_domain_level);
				1193
				1194	static void set_domain_attribute(struct sched_domain *sd,
				1195	struct sched_domain_attr *attr)
				1196	{
				1197	int request;
				1198
				1199	if (!attr \|\| attr->relax_domain_level < 0) {
				1200	if (default_relax_domain_level < 0)
				1201	return;
Valentin Schneider	9ae7ab2	2019-10-14 17:44:08 +0100	[diff] [blame]	1202	request = default_relax_domain_level;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1203	} else
				1204	request = attr->relax_domain_level;
Valentin Schneider	9ae7ab2	2019-10-14 17:44:08 +0100	[diff] [blame]	1205
				1206	if (sd->level > request) {
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1207	/* Turn off idle balance on this domain: */
				1208	sd->flags &= ~(SD_BALANCE_WAKE\|SD_BALANCE_NEWIDLE);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1209	}
				1210	}
				1211
				1212	static void __sdt_free(const struct cpumask *cpu_map);
				1213	static int __sdt_alloc(const struct cpumask *cpu_map);
				1214
				1215	static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
				1216	const struct cpumask *cpu_map)
				1217	{
				1218	switch (what) {
				1219	case sa_rootdomain:
				1220	if (!atomic_read(&d->rd->refcount))
				1221	free_rootdomain(&d->rd->rcu);
Gustavo A. R. Silva	df561f66	2020-08-23 17:36:59 -0500	[diff] [blame^]	1222	fallthrough;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1223	case sa_sd:
				1224	free_percpu(d->sd);
Gustavo A. R. Silva	df561f66	2020-08-23 17:36:59 -0500	[diff] [blame^]	1225	fallthrough;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1226	case sa_sd_storage:
				1227	__sdt_free(cpu_map);
Gustavo A. R. Silva	df561f66	2020-08-23 17:36:59 -0500	[diff] [blame^]	1228	fallthrough;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1229	case sa_none:
				1230	break;
				1231	}
				1232	}
				1233
				1234	static enum s_alloc
				1235	__visit_domain_allocation_hell(struct s_data d, const struct cpumask cpu_map)
				1236	{
				1237	memset(d, 0, sizeof(*d));
				1238
				1239	if (__sdt_alloc(cpu_map))
				1240	return sa_sd_storage;
				1241	d->sd = alloc_percpu(struct sched_domain *);
				1242	if (!d->sd)
				1243	return sa_sd_storage;
				1244	d->rd = alloc_rootdomain();
				1245	if (!d->rd)
				1246	return sa_sd;
Ingo Molnar	97fb7a0	2018-03-03 14:01:12 +0100	[diff] [blame]	1247
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1248	return sa_rootdomain;
				1249	}
				1250
				1251	/*
				1252	* NULL the sd_data elements we've used to build the sched_domain and
				1253	* sched_group structure so that the subsequent __free_domain_allocs()
				1254	* will not free the data we're using.
				1255	*/
				1256	static void claim_allocations(int cpu, struct sched_domain *sd)
				1257	{
				1258	struct sd_data *sdd = sd->private;
				1259
				1260	WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
				1261	*per_cpu_ptr(sdd->sd, cpu) = NULL;
				1262
				1263	if (atomic_read(&(*per_cpu_ptr(sdd->sds, cpu))->ref))
				1264	*per_cpu_ptr(sdd->sds, cpu) = NULL;
				1265
				1266	if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))
				1267	*per_cpu_ptr(sdd->sg, cpu) = NULL;
				1268
				1269	if (atomic_read(&(*per_cpu_ptr(sdd->sgc, cpu))->ref))
				1270	*per_cpu_ptr(sdd->sgc, cpu) = NULL;
				1271	}
				1272
				1273	#ifdef CONFIG_NUMA
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1274	enum numa_topology_type sched_numa_topology_type;
Ingo Molnar	97fb7a0	2018-03-03 14:01:12 +0100	[diff] [blame]	1275
				1276	static int sched_domains_numa_levels;
				1277	static int sched_domains_curr_level;
				1278
				1279	int sched_max_numa_distance;
				1280	static int *sched_domains_numa_distance;
				1281	static struct cpumask ***sched_domains_numa_masks;
Matt Fleming	a55c745	2019-08-08 20:53:01 +0100	[diff] [blame]	1282	int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1283	#endif
				1284
				1285	/*
				1286	* SD_flags allowed in topology descriptions.
				1287	*
				1288	* These flags are purely descriptive of the topology and do not prescribe
				1289	* behaviour. Behaviour is artificial and mapped in the below sd_init()
				1290	* function:
				1291	*
				1292	* SD_SHARE_CPUCAPACITY - describes SMT topologies
				1293	* SD_SHARE_PKG_RESOURCES - describes shared caches
				1294	* SD_NUMA - describes NUMA topologies
				1295	* SD_SHARE_POWERDOMAIN - describes shared power domain
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1296	*
				1297	* Odd one out, which beside describing the topology has a quirk also
				1298	* prescribes the desired behaviour that goes along with it:
				1299	*
				1300	* SD_ASYM_PACKING - describes SMT quirks
				1301	*/
				1302	#define TOPOLOGY_SD_FLAGS \
Ingo Molnar	97fb7a0	2018-03-03 14:01:12 +0100	[diff] [blame]	1303	(SD_SHARE_CPUCAPACITY \| \
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1304	SD_SHARE_PKG_RESOURCES \| \
Ingo Molnar	97fb7a0	2018-03-03 14:01:12 +0100	[diff] [blame]	1305	SD_NUMA \| \
				1306	SD_ASYM_PACKING \| \
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1307	SD_SHARE_POWERDOMAIN)
				1308
				1309	static struct sched_domain *
				1310	sd_init(struct sched_domain_topology_level *tl,
				1311	const struct cpumask *cpu_map,
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	1312	struct sched_domain *child, int dflags, int cpu)
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1313	{
				1314	struct sd_data *sdd = &tl->data;
				1315	struct sched_domain sd = per_cpu_ptr(sdd->sd, cpu);
				1316	int sd_id, sd_weight, sd_flags = 0;
				1317
				1318	#ifdef CONFIG_NUMA
				1319	/*
				1320	* Ugly hack to pass state to sd_numa_mask()...
				1321	*/
				1322	sched_domains_curr_level = tl->numa_level;
				1323	#endif
				1324
				1325	sd_weight = cpumask_weight(tl->mask(cpu));
				1326
				1327	if (tl->sd_flags)
				1328	sd_flags = (*tl->sd_flags)();
				1329	if (WARN_ONCE(sd_flags & ~TOPOLOGY_SD_FLAGS,
				1330	"wrong sd_flags in topology description\n"))
Peng Liu	9b1b234	2020-06-09 23:09:36 +0800	[diff] [blame]	1331	sd_flags &= TOPOLOGY_SD_FLAGS;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1332
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	1333	/* Apply detected topology flags */
				1334	sd_flags \|= dflags;
				1335
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1336	*sd = (struct sched_domain){
				1337	.min_interval = sd_weight,
				1338	.max_interval = 2*sd_weight,
				1339	.busy_factor = 32,
				1340	.imbalance_pct = 125,
				1341
				1342	.cache_nice_tries = 0,
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1343
Valentin Schneider	36c5bdc	2020-04-15 22:05:07 +0100	[diff] [blame]	1344	.flags = 1*SD_BALANCE_NEWIDLE
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1345	\| 1*SD_BALANCE_EXEC
				1346	\| 1*SD_BALANCE_FORK
				1347	\| 0*SD_BALANCE_WAKE
				1348	\| 1*SD_WAKE_AFFINE
				1349	\| 0*SD_SHARE_CPUCAPACITY
				1350	\| 0*SD_SHARE_PKG_RESOURCES
				1351	\| 0*SD_SERIALIZE
Morten Rasmussen	9c63e84	2018-07-04 11:17:50 +0100	[diff] [blame]	1352	\| 1*SD_PREFER_SIBLING
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1353	\| 0*SD_NUMA
				1354	\| sd_flags
				1355	,
				1356
				1357	.last_balance = jiffies,
				1358	.balance_interval = sd_weight,
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1359	.max_newidle_lb_cost = 0,
				1360	.next_decay_max_lb_cost = jiffies,
				1361	.child = child,
				1362	#ifdef CONFIG_SCHED_DEBUG
				1363	.name = tl->name,
				1364	#endif
				1365	};
				1366
				1367	cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
				1368	sd_id = cpumask_first(sched_domain_span(sd));
				1369
				1370	/*
				1371	* Convert topological properties into behaviour.
				1372	*/
				1373
Morten Rasmussen	a526d46	2020-02-06 19:19:55 +0000	[diff] [blame]	1374	/* Don't attempt to spread across CPUs of different capacities. */
				1375	if ((sd->flags & SD_ASYM_CPUCAPACITY) && sd->child)
				1376	sd->child->flags &= ~SD_PREFER_SIBLING;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1377
				1378	if (sd->flags & SD_SHARE_CPUCAPACITY) {
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1379	sd->imbalance_pct = 110;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1380
				1381	} else if (sd->flags & SD_SHARE_PKG_RESOURCES) {
				1382	sd->imbalance_pct = 117;
				1383	sd->cache_nice_tries = 1;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1384
				1385	#ifdef CONFIG_NUMA
				1386	} else if (sd->flags & SD_NUMA) {
				1387	sd->cache_nice_tries = 2;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1388
Morten Rasmussen	9c63e84	2018-07-04 11:17:50 +0100	[diff] [blame]	1389	sd->flags &= ~SD_PREFER_SIBLING;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1390	sd->flags \|= SD_SERIALIZE;
Matt Fleming	a55c745	2019-08-08 20:53:01 +0100	[diff] [blame]	1391	if (sched_domains_numa_distance[tl->numa_level] > node_reclaim_distance) {
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1392	sd->flags &= ~(SD_BALANCE_EXEC \|
				1393	SD_BALANCE_FORK \|
				1394	SD_WAKE_AFFINE);
				1395	}
				1396
				1397	#endif
				1398	} else {
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1399	sd->cache_nice_tries = 1;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1400	}
				1401
				1402	/*
				1403	* For all levels sharing cache; connect a sched_domain_shared
				1404	* instance.
				1405	*/
				1406	if (sd->flags & SD_SHARE_PKG_RESOURCES) {
				1407	sd->shared = *per_cpu_ptr(sdd->sds, sd_id);
				1408	atomic_inc(&sd->shared->ref);
				1409	atomic_set(&sd->shared->nr_busy_cpus, sd_weight);
				1410	}
				1411
				1412	sd->private = sdd;
				1413
				1414	return sd;
				1415	}
				1416
				1417	/*
				1418	* Topology list, bottom-up.
				1419	*/
				1420	static struct sched_domain_topology_level default_topology[] = {
				1421	#ifdef CONFIG_SCHED_SMT
				1422	{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
				1423	#endif
				1424	#ifdef CONFIG_SCHED_MC
				1425	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
				1426	#endif
				1427	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
				1428	{ NULL, },
				1429	};
				1430
				1431	static struct sched_domain_topology_level *sched_domain_topology =
				1432	default_topology;
				1433
				1434	#define for_each_sd_topology(tl) \
				1435	for (tl = sched_domain_topology; tl->mask; tl++)
				1436
				1437	void set_sched_topology(struct sched_domain_topology_level *tl)
				1438	{
				1439	if (WARN_ON_ONCE(sched_smp_initialized))
				1440	return;
				1441
				1442	sched_domain_topology = tl;
				1443	}
				1444
				1445	#ifdef CONFIG_NUMA
				1446
				1447	static const struct cpumask *sd_numa_mask(int cpu)
				1448	{
				1449	return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)];
				1450	}
				1451
				1452	static void sched_numa_warn(const char *str)
				1453	{
				1454	static int done = false;
				1455	int i,j;
				1456
				1457	if (done)
				1458	return;
				1459
				1460	done = true;
				1461
				1462	printk(KERN_WARNING "ERROR: %s\n\n", str);
				1463
				1464	for (i = 0; i < nr_node_ids; i++) {
				1465	printk(KERN_WARNING " ");
				1466	for (j = 0; j < nr_node_ids; j++)
				1467	printk(KERN_CONT "%02d ", node_distance(i,j));
				1468	printk(KERN_CONT "\n");
				1469	}
				1470	printk(KERN_WARNING "\n");
				1471	}
				1472
				1473	bool find_numa_distance(int distance)
				1474	{
				1475	int i;
				1476
				1477	if (distance == node_distance(0, 0))
				1478	return true;
				1479
				1480	for (i = 0; i < sched_domains_numa_levels; i++) {
				1481	if (sched_domains_numa_distance[i] == distance)
				1482	return true;
				1483	}
				1484
				1485	return false;
				1486	}
				1487
				1488	/*
				1489	* A system can have three types of NUMA topology:
				1490	* NUMA_DIRECT: all nodes are directly connected, or not a NUMA system
				1491	* NUMA_GLUELESS_MESH: some nodes reachable through intermediary nodes
				1492	* NUMA_BACKPLANE: nodes can reach other nodes through a backplane
				1493	*
				1494	* The difference between a glueless mesh topology and a backplane
				1495	* topology lies in whether communication between not directly
				1496	* connected nodes goes through intermediary nodes (where programs
				1497	* could run), or through backplane controllers. This affects
				1498	* placement of programs.
				1499	*
				1500	* The type of topology can be discerned with the following tests:
				1501	* - If the maximum distance between any nodes is 1 hop, the system
				1502	* is directly connected.
				1503	* - If for two nodes A and B, located N > 1 hops away from each other,
				1504	* there is an intermediary node C, which is < N hops away from both
				1505	* nodes A and B, the system is a glueless mesh.
				1506	*/
				1507	static void init_numa_topology_type(void)
				1508	{
				1509	int a, b, c, n;
				1510
				1511	n = sched_max_numa_distance;
				1512
Srikar Dronamraju	e5e96fa	2018-08-10 22:30:18 +0530	[diff] [blame]	1513	if (sched_domains_numa_levels <= 2) {
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1514	sched_numa_topology_type = NUMA_DIRECT;
				1515	return;
				1516	}
				1517
				1518	for_each_online_node(a) {
				1519	for_each_online_node(b) {
				1520	/* Find two nodes furthest removed from each other. */
				1521	if (node_distance(a, b) < n)
				1522	continue;
				1523
				1524	/* Is there an intermediary node between a and b? */
				1525	for_each_online_node(c) {
				1526	if (node_distance(a, c) < n &&
				1527	node_distance(b, c) < n) {
				1528	sched_numa_topology_type =
				1529	NUMA_GLUELESS_MESH;
				1530	return;
				1531	}
				1532	}
				1533
				1534	sched_numa_topology_type = NUMA_BACKPLANE;
				1535	return;
				1536	}
				1537	}
				1538	}
				1539
				1540	void sched_init_numa(void)
				1541	{
				1542	int next_distance, curr_distance = node_distance(0, 0);
				1543	struct sched_domain_topology_level *tl;
				1544	int level = 0;
				1545	int i, j, k;
				1546
Peter Zijlstra	993f0b0	2018-11-02 14:22:25 +0100	[diff] [blame]	1547	sched_domains_numa_distance = kzalloc(sizeof(int) * (nr_node_ids + 1), GFP_KERNEL);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1548	if (!sched_domains_numa_distance)
				1549	return;
				1550
Suravee Suthikulpanit	051f3ca	2017-09-07 02:20:05 -0500	[diff] [blame]	1551	/* Includes NUMA identity node at level 0. */
				1552	sched_domains_numa_distance[level++] = curr_distance;
				1553	sched_domains_numa_levels = level;
				1554
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1555	/*
				1556	* O(nr_nodes^2) deduplicating selection sort -- in order to find the
				1557	* unique distances in the node_distance() table.
				1558	*
				1559	* Assumes node_distance(0,j) includes all distances in
				1560	* node_distance(i,j) in order to avoid cubic time.
				1561	*/
				1562	next_distance = curr_distance;
				1563	for (i = 0; i < nr_node_ids; i++) {
				1564	for (j = 0; j < nr_node_ids; j++) {
				1565	for (k = 0; k < nr_node_ids; k++) {
				1566	int distance = node_distance(i, k);
				1567
				1568	if (distance > curr_distance &&
				1569	(distance < next_distance \|\|
				1570	next_distance == curr_distance))
				1571	next_distance = distance;
				1572
				1573	/*
				1574	* While not a strong assumption it would be nice to know
				1575	* about cases where if node A is connected to B, B is not
				1576	* equally connected to A.
				1577	*/
				1578	if (sched_debug() && node_distance(k, i) != distance)
				1579	sched_numa_warn("Node-distance not symmetric");
				1580
				1581	if (sched_debug() && i && !find_numa_distance(distance))
				1582	sched_numa_warn("Node-0 not representative");
				1583	}
				1584	if (next_distance != curr_distance) {
				1585	sched_domains_numa_distance[level++] = next_distance;
				1586	sched_domains_numa_levels = level;
				1587	curr_distance = next_distance;
				1588	} else break;
				1589	}
				1590
				1591	/*
				1592	* In case of sched_debug() we verify the above assumption.
				1593	*/
				1594	if (!sched_debug())
				1595	break;
				1596	}
				1597
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1598	/*
Suravee Suthikulpanit	051f3ca	2017-09-07 02:20:05 -0500	[diff] [blame]	1599	* 'level' contains the number of unique distances
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1600	*
				1601	* The sched_domains_numa_distance[] array includes the actual distance
				1602	* numbers.
				1603	*/
				1604
				1605	/*
				1606	* Here, we should temporarily reset sched_domains_numa_levels to 0.
				1607	* If it fails to allocate memory for array sched_domains_numa_masks[][],
				1608	* the array will contain less then 'level' members. This could be
				1609	* dangerous when we use it to iterate array sched_domains_numa_masks[][]
				1610	* in other functions.
				1611	*
				1612	* We reset it to 'level' at the end of this function.
				1613	*/
				1614	sched_domains_numa_levels = 0;
				1615
				1616	sched_domains_numa_masks = kzalloc(sizeof(void ) level, GFP_KERNEL);
				1617	if (!sched_domains_numa_masks)
				1618	return;
				1619
				1620	/*
				1621	* Now for each level, construct a mask per node which contains all
				1622	* CPUs of nodes that are that many hops away from us.
				1623	*/
				1624	for (i = 0; i < level; i++) {
				1625	sched_domains_numa_masks[i] =
				1626	kzalloc(nr_node_ids * sizeof(void *), GFP_KERNEL);
				1627	if (!sched_domains_numa_masks[i])
				1628	return;
				1629
				1630	for (j = 0; j < nr_node_ids; j++) {
				1631	struct cpumask *mask = kzalloc(cpumask_size(), GFP_KERNEL);
				1632	if (!mask)
				1633	return;
				1634
				1635	sched_domains_numa_masks[i][j] = mask;
				1636
				1637	for_each_node(k) {
				1638	if (node_distance(j, k) > sched_domains_numa_distance[i])
				1639	continue;
				1640
				1641	cpumask_or(mask, mask, cpumask_of_node(k));
				1642	}
				1643	}
				1644	}
				1645
				1646	/* Compute default topology size */
				1647	for (i = 0; sched_domain_topology[i].mask; i++);
				1648
				1649	tl = kzalloc((i + level + 1) *
				1650	sizeof(struct sched_domain_topology_level), GFP_KERNEL);
				1651	if (!tl)
				1652	return;
				1653
				1654	/*
				1655	* Copy the default topology bits..
				1656	*/
				1657	for (i = 0; sched_domain_topology[i].mask; i++)
				1658	tl[i] = sched_domain_topology[i];
				1659
				1660	/*
Suravee Suthikulpanit	051f3ca	2017-09-07 02:20:05 -0500	[diff] [blame]	1661	* Add the NUMA identity distance, aka single NODE.
				1662	*/
				1663	tl[i++] = (struct sched_domain_topology_level){
				1664	.mask = sd_numa_mask,
				1665	.numa_level = 0,
				1666	SD_INIT_NAME(NODE)
				1667	};
				1668
				1669	/*
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1670	* .. and append 'j' levels of NUMA goodness.
				1671	*/
Suravee Suthikulpanit	051f3ca	2017-09-07 02:20:05 -0500	[diff] [blame]	1672	for (j = 1; j < level; i++, j++) {
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1673	tl[i] = (struct sched_domain_topology_level){
				1674	.mask = sd_numa_mask,
				1675	.sd_flags = cpu_numa_flags,
				1676	.flags = SDTL_OVERLAP,
				1677	.numa_level = j,
				1678	SD_INIT_NAME(NUMA)
				1679	};
				1680	}
				1681
				1682	sched_domain_topology = tl;
				1683
				1684	sched_domains_numa_levels = level;
				1685	sched_max_numa_distance = sched_domains_numa_distance[level - 1];
				1686
				1687	init_numa_topology_type();
				1688	}
				1689
				1690	void sched_domains_numa_masks_set(unsigned int cpu)
				1691	{
				1692	int node = cpu_to_node(cpu);
				1693	int i, j;
				1694
				1695	for (i = 0; i < sched_domains_numa_levels; i++) {
				1696	for (j = 0; j < nr_node_ids; j++) {
				1697	if (node_distance(j, node) <= sched_domains_numa_distance[i])
				1698	cpumask_set_cpu(cpu, sched_domains_numa_masks[i][j]);
				1699	}
				1700	}
				1701	}
				1702
				1703	void sched_domains_numa_masks_clear(unsigned int cpu)
				1704	{
				1705	int i, j;
				1706
				1707	for (i = 0; i < sched_domains_numa_levels; i++) {
				1708	for (j = 0; j < nr_node_ids; j++)
				1709	cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]);
				1710	}
				1711	}
				1712
Wanpeng Li	e0e8d49	2019-06-28 16:51:41 +0800	[diff] [blame]	1713	/*
				1714	* sched_numa_find_closest() - given the NUMA topology, find the cpu
				1715	* closest to @cpu from @cpumask.
				1716	* cpumask: cpumask to find a cpu from
				1717	* cpu: cpu to be close to
				1718	*
				1719	* returns: cpu, or nr_cpu_ids when nothing found.
				1720	*/
				1721	int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
				1722	{
				1723	int i, j = cpu_to_node(cpu);
				1724
				1725	for (i = 0; i < sched_domains_numa_levels; i++) {
				1726	cpu = cpumask_any_and(cpus, sched_domains_numa_masks[i][j]);
				1727	if (cpu < nr_cpu_ids)
				1728	return cpu;
				1729	}
				1730	return nr_cpu_ids;
				1731	}
				1732
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1733	#endif /* CONFIG_NUMA */
				1734
				1735	static int __sdt_alloc(const struct cpumask *cpu_map)
				1736	{
				1737	struct sched_domain_topology_level *tl;
				1738	int j;
				1739
				1740	for_each_sd_topology(tl) {
				1741	struct sd_data *sdd = &tl->data;
				1742
				1743	sdd->sd = alloc_percpu(struct sched_domain *);
				1744	if (!sdd->sd)
				1745	return -ENOMEM;
				1746
				1747	sdd->sds = alloc_percpu(struct sched_domain_shared *);
				1748	if (!sdd->sds)
				1749	return -ENOMEM;
				1750
				1751	sdd->sg = alloc_percpu(struct sched_group *);
				1752	if (!sdd->sg)
				1753	return -ENOMEM;
				1754
				1755	sdd->sgc = alloc_percpu(struct sched_group_capacity *);
				1756	if (!sdd->sgc)
				1757	return -ENOMEM;
				1758
				1759	for_each_cpu(j, cpu_map) {
				1760	struct sched_domain *sd;
				1761	struct sched_domain_shared *sds;
				1762	struct sched_group *sg;
				1763	struct sched_group_capacity *sgc;
				1764
				1765	sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
				1766	GFP_KERNEL, cpu_to_node(j));
				1767	if (!sd)
				1768	return -ENOMEM;
				1769
				1770	*per_cpu_ptr(sdd->sd, j) = sd;
				1771
				1772	sds = kzalloc_node(sizeof(struct sched_domain_shared),
				1773	GFP_KERNEL, cpu_to_node(j));
				1774	if (!sds)
				1775	return -ENOMEM;
				1776
				1777	*per_cpu_ptr(sdd->sds, j) = sds;
				1778
				1779	sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
				1780	GFP_KERNEL, cpu_to_node(j));
				1781	if (!sg)
				1782	return -ENOMEM;
				1783
				1784	sg->next = sg;
				1785
				1786	*per_cpu_ptr(sdd->sg, j) = sg;
				1787
				1788	sgc = kzalloc_node(sizeof(struct sched_group_capacity) + cpumask_size(),
				1789	GFP_KERNEL, cpu_to_node(j));
				1790	if (!sgc)
				1791	return -ENOMEM;
				1792
Peter Zijlstra	005f874	2017-04-26 17:35:35 +0200	[diff] [blame]	1793	#ifdef CONFIG_SCHED_DEBUG
				1794	sgc->id = j;
				1795	#endif
				1796
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1797	*per_cpu_ptr(sdd->sgc, j) = sgc;
				1798	}
				1799	}
				1800
				1801	return 0;
				1802	}
				1803
				1804	static void __sdt_free(const struct cpumask *cpu_map)
				1805	{
				1806	struct sched_domain_topology_level *tl;
				1807	int j;
				1808
				1809	for_each_sd_topology(tl) {
				1810	struct sd_data *sdd = &tl->data;
				1811
				1812	for_each_cpu(j, cpu_map) {
				1813	struct sched_domain *sd;
				1814
				1815	if (sdd->sd) {
				1816	sd = *per_cpu_ptr(sdd->sd, j);
				1817	if (sd && (sd->flags & SD_OVERLAP))
				1818	free_sched_groups(sd->groups, 0);
				1819	kfree(*per_cpu_ptr(sdd->sd, j));
				1820	}
				1821
				1822	if (sdd->sds)
				1823	kfree(*per_cpu_ptr(sdd->sds, j));
				1824	if (sdd->sg)
				1825	kfree(*per_cpu_ptr(sdd->sg, j));
				1826	if (sdd->sgc)
				1827	kfree(*per_cpu_ptr(sdd->sgc, j));
				1828	}
				1829	free_percpu(sdd->sd);
				1830	sdd->sd = NULL;
				1831	free_percpu(sdd->sds);
				1832	sdd->sds = NULL;
				1833	free_percpu(sdd->sg);
				1834	sdd->sg = NULL;
				1835	free_percpu(sdd->sgc);
				1836	sdd->sgc = NULL;
				1837	}
				1838	}
				1839
Viresh Kumar	181a80d1	2017-04-27 13:58:59 +0530	[diff] [blame]	1840	static struct sched_domain build_sched_domain(struct sched_domain_topology_level tl,
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1841	const struct cpumask cpu_map, struct sched_domain_attr attr,
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	1842	struct sched_domain *child, int dflags, int cpu)
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1843	{
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	1844	struct sched_domain *sd = sd_init(tl, cpu_map, child, dflags, cpu);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1845
				1846	if (child) {
				1847	sd->level = child->level + 1;
				1848	sched_domain_level_max = max(sched_domain_level_max, sd->level);
				1849	child->parent = sd;
				1850
				1851	if (!cpumask_subset(sched_domain_span(child),
				1852	sched_domain_span(sd))) {
				1853	pr_err("BUG: arch topology borken\n");
				1854	#ifdef CONFIG_SCHED_DEBUG
				1855	pr_err(" the %s domain not a subset of the %s domain\n",
				1856	child->name, sd->name);
				1857	#endif
Ingo Molnar	97fb7a0	2018-03-03 14:01:12 +0100	[diff] [blame]	1858	/* Fixup, ensure @sd has at least @child CPUs. */
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1859	cpumask_or(sched_domain_span(sd),
				1860	sched_domain_span(sd),
				1861	sched_domain_span(child));
				1862	}
				1863
				1864	}
				1865	set_domain_attribute(sd, attr);
				1866
				1867	return sd;
				1868	}
				1869
				1870	/*
Valentin Schneider	ccf7412	2020-01-15 16:09:15 +0000	[diff] [blame]	1871	* Ensure topology masks are sane, i.e. there are no conflicts (overlaps) for
				1872	* any two given CPUs at this (non-NUMA) topology level.
				1873	*/
				1874	static bool topology_span_sane(struct sched_domain_topology_level *tl,
				1875	const struct cpumask *cpu_map, int cpu)
				1876	{
				1877	int i;
				1878
				1879	/* NUMA levels are allowed to overlap */
				1880	if (tl->flags & SDTL_OVERLAP)
				1881	return true;
				1882
				1883	/*
				1884	* Non-NUMA levels cannot partially overlap - they must be either
				1885	* completely equal or completely disjoint. Otherwise we can end up
				1886	* breaking the sched_group lists - i.e. a later get_group() pass
				1887	* breaks the linking done for an earlier span.
				1888	*/
				1889	for_each_cpu(i, cpu_map) {
				1890	if (i == cpu)
				1891	continue;
				1892	/*
				1893	* We should 'and' all those masks with 'cpu_map' to exactly
				1894	* match the topology we're about to build, but that can only
				1895	* remove CPUs, which only lessens our ability to detect
				1896	* overlaps
				1897	*/
				1898	if (!cpumask_equal(tl->mask(cpu), tl->mask(i)) &&
				1899	cpumask_intersects(tl->mask(cpu), tl->mask(i)))
				1900	return false;
				1901	}
				1902
				1903	return true;
				1904	}
				1905
				1906	/*
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	1907	* Find the sched_domain_topology_level where all CPU capacities are visible
				1908	* for all CPUs.
				1909	*/
				1910	static struct sched_domain_topology_level
				1911	asym_cpu_capacity_level(const struct cpumask cpu_map)
				1912	{
				1913	int i, j, asym_level = 0;
				1914	bool asym = false;
				1915	struct sched_domain_topology_level tl, asym_tl = NULL;
				1916	unsigned long cap;
				1917
				1918	/* Is there any asymmetry? */
Vincent Guittot	8ec59c0	2019-06-17 17:00:17 +0200	[diff] [blame]	1919	cap = arch_scale_cpu_capacity(cpumask_first(cpu_map));
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	1920
				1921	for_each_cpu(i, cpu_map) {
Vincent Guittot	8ec59c0	2019-06-17 17:00:17 +0200	[diff] [blame]	1922	if (arch_scale_cpu_capacity(i) != cap) {
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	1923	asym = true;
				1924	break;
				1925	}
				1926	}
				1927
				1928	if (!asym)
				1929	return NULL;
				1930
				1931	/*
				1932	* Examine topology from all CPU's point of views to detect the lowest
				1933	* sched_domain_topology_level where a highest capacity CPU is visible
				1934	* to everyone.
				1935	*/
				1936	for_each_cpu(i, cpu_map) {
Vincent Guittot	8ec59c0	2019-06-17 17:00:17 +0200	[diff] [blame]	1937	unsigned long max_capacity = arch_scale_cpu_capacity(i);
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	1938	int tl_id = 0;
				1939
				1940	for_each_sd_topology(tl) {
				1941	if (tl_id < asym_level)
				1942	goto next_level;
				1943
				1944	for_each_cpu_and(j, tl->mask(i), cpu_map) {
				1945	unsigned long capacity;
				1946
Vincent Guittot	8ec59c0	2019-06-17 17:00:17 +0200	[diff] [blame]	1947	capacity = arch_scale_cpu_capacity(j);
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	1948
				1949	if (capacity <= max_capacity)
				1950	continue;
				1951
				1952	max_capacity = capacity;
				1953	asym_level = tl_id;
				1954	asym_tl = tl;
				1955	}
				1956	next_level:
				1957	tl_id++;
				1958	}
				1959	}
				1960
				1961	return asym_tl;
				1962	}
				1963
				1964
				1965	/*
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1966	* Build sched domains for a given set of CPUs and attach the sched domains
				1967	* to the individual CPUs
				1968	*/
				1969	static int
				1970	build_sched_domains(const struct cpumask cpu_map, struct sched_domain_attr attr)
				1971	{
Valentin Schneider	cd1cb33	2019-10-23 16:37:44 +0100	[diff] [blame]	1972	enum s_alloc alloc_state = sa_none;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1973	struct sched_domain *sd;
				1974	struct s_data d;
				1975	struct rq *rq = NULL;
				1976	int i, ret = -ENOMEM;
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	1977	struct sched_domain_topology_level *tl_asym;
Morten Rasmussen	df054e8	2018-07-04 11:17:39 +0100	[diff] [blame]	1978	bool has_asym = false;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1979
Valentin Schneider	cd1cb33	2019-10-23 16:37:44 +0100	[diff] [blame]	1980	if (WARN_ON(cpumask_empty(cpu_map)))
				1981	goto error;
				1982
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1983	alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
				1984	if (alloc_state != sa_rootdomain)
				1985	goto error;
				1986
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	1987	tl_asym = asym_cpu_capacity_level(cpu_map);
				1988
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1989	/* Set up domains for CPUs specified by the cpu_map: */
				1990	for_each_cpu(i, cpu_map) {
				1991	struct sched_domain_topology_level *tl;
				1992
				1993	sd = NULL;
				1994	for_each_sd_topology(tl) {
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	1995	int dflags = 0;
				1996
Morten Rasmussen	df054e8	2018-07-04 11:17:39 +0100	[diff] [blame]	1997	if (tl == tl_asym) {
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	1998	dflags \|= SD_ASYM_CPUCAPACITY;
Morten Rasmussen	df054e8	2018-07-04 11:17:39 +0100	[diff] [blame]	1999	has_asym = true;
				2000	}
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	2001
Valentin Schneider	ccf7412	2020-01-15 16:09:15 +0000	[diff] [blame]	2002	if (WARN_ON(!topology_span_sane(tl, cpu_map, i)))
				2003	goto error;
				2004
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	2005	sd = build_sched_domain(tl, cpu_map, attr, sd, dflags, i);
				2006
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2007	if (tl == sched_domain_topology)
				2008	*per_cpu_ptr(d.sd, i) = sd;
Peter Zijlstra	af85596	2017-04-26 17:36:41 +0200	[diff] [blame]	2009	if (tl->flags & SDTL_OVERLAP)
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2010	sd->flags \|= SD_OVERLAP;
				2011	if (cpumask_equal(cpu_map, sched_domain_span(sd)))
				2012	break;
				2013	}
				2014	}
				2015
				2016	/* Build the groups for the domains */
				2017	for_each_cpu(i, cpu_map) {
				2018	for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
				2019	sd->span_weight = cpumask_weight(sched_domain_span(sd));
				2020	if (sd->flags & SD_OVERLAP) {
				2021	if (build_overlap_sched_groups(sd, i))
				2022	goto error;
				2023	} else {
				2024	if (build_sched_groups(sd, i))
				2025	goto error;
				2026	}
				2027	}
				2028	}
				2029
				2030	/* Calculate CPU capacity for physical packages and nodes */
				2031	for (i = nr_cpumask_bits-1; i >= 0; i--) {
				2032	if (!cpumask_test_cpu(i, cpu_map))
				2033	continue;
				2034
				2035	for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
				2036	claim_allocations(i, sd);
				2037	init_sched_groups_capacity(i, sd);
				2038	}
				2039	}
				2040
				2041	/* Attach the domains */
				2042	rcu_read_lock();
				2043	for_each_cpu(i, cpu_map) {
				2044	rq = cpu_rq(i);
				2045	sd = *per_cpu_ptr(d.sd, i);
				2046
				2047	/* Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing: */
				2048	if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity))
				2049	WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig);
				2050
				2051	cpu_attach_domain(sd, d.rd, i);
				2052	}
				2053	rcu_read_unlock();
				2054
Morten Rasmussen	df054e8	2018-07-04 11:17:39 +0100	[diff] [blame]	2055	if (has_asym)
Valentin Schneider	e284df7	2019-10-23 16:37:45 +0100	[diff] [blame]	2056	static_branch_inc_cpuslocked(&sched_asym_cpucapacity);
Morten Rasmussen	df054e8	2018-07-04 11:17:39 +0100	[diff] [blame]	2057
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2058	if (rq && sched_debug_enabled) {
Juri Lelli	bf5015a	2018-05-24 17:29:36 +0200	[diff] [blame]	2059	pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n",
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2060	cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
				2061	}
				2062
				2063	ret = 0;
				2064	error:
				2065	__free_domain_allocs(&d, alloc_state, cpu_map);
Ingo Molnar	97fb7a0	2018-03-03 14:01:12 +0100	[diff] [blame]	2066
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2067	return ret;
				2068	}
				2069
				2070	/* Current sched domains: */
				2071	static cpumask_var_t *doms_cur;
				2072
				2073	/* Number of sched domains in 'doms_cur': */
				2074	static int ndoms_cur;
				2075
				2076	/* Attribues of custom domains in 'doms_cur' */
				2077	static struct sched_domain_attr *dattr_cur;
				2078
				2079	/*
				2080	* Special case: If a kmalloc() of a doms_cur partition (array of
				2081	* cpumask) fails, then fallback to a single sched domain,
				2082	* as determined by the single cpumask fallback_doms.
				2083	*/
Peter Zijlstra	8d5dc51	2017-04-25 15:29:40 +0200	[diff] [blame]	2084	static cpumask_var_t fallback_doms;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2085
				2086	/*
				2087	* arch_update_cpu_topology lets virtualized architectures update the
				2088	* CPU core maps. It is supposed to return 1 if the topology changed
				2089	* or 0 if it stayed the same.
				2090	*/
				2091	int __weak arch_update_cpu_topology(void)
				2092	{
				2093	return 0;
				2094	}
				2095
				2096	cpumask_var_t *alloc_sched_domains(unsigned int ndoms)
				2097	{
				2098	int i;
				2099	cpumask_var_t *doms;
				2100
Kees Cook	6da2ec5	2018-06-12 13:55:00 -0700	[diff] [blame]	2101	doms = kmalloc_array(ndoms, sizeof(*doms), GFP_KERNEL);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2102	if (!doms)
				2103	return NULL;
				2104	for (i = 0; i < ndoms; i++) {
				2105	if (!alloc_cpumask_var(&doms[i], GFP_KERNEL)) {
				2106	free_sched_domains(doms, i);
				2107	return NULL;
				2108	}
				2109	}
				2110	return doms;
				2111	}
				2112
				2113	void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms)
				2114	{
				2115	unsigned int i;
				2116	for (i = 0; i < ndoms; i++)
				2117	free_cpumask_var(doms[i]);
				2118	kfree(doms);
				2119	}
				2120
				2121	/*
Juri Lelli	cb0c041	2018-12-19 14:34:45 +0100	[diff] [blame]	2122	* Set up scheduler domains and groups. For now this just excludes isolated
				2123	* CPUs, but could be used to exclude other special cases in the future.
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2124	*/
Peter Zijlstra	8d5dc51	2017-04-25 15:29:40 +0200	[diff] [blame]	2125	int sched_init_domains(const struct cpumask *cpu_map)
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2126	{
				2127	int err;
				2128
Peter Zijlstra	8d5dc51	2017-04-25 15:29:40 +0200	[diff] [blame]	2129	zalloc_cpumask_var(&sched_domains_tmpmask, GFP_KERNEL);
Peter Zijlstra	1676330	2017-04-25 14:31:11 +0200	[diff] [blame]	2130	zalloc_cpumask_var(&sched_domains_tmpmask2, GFP_KERNEL);
Peter Zijlstra	8d5dc51	2017-04-25 15:29:40 +0200	[diff] [blame]	2131	zalloc_cpumask_var(&fallback_doms, GFP_KERNEL);
				2132
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2133	arch_update_cpu_topology();
				2134	ndoms_cur = 1;
				2135	doms_cur = alloc_sched_domains(ndoms_cur);
				2136	if (!doms_cur)
				2137	doms_cur = &fallback_doms;
Frederic Weisbecker	edb9382	2017-10-27 04:42:37 +0200	[diff] [blame]	2138	cpumask_and(doms_cur[0], cpu_map, housekeeping_cpumask(HK_FLAG_DOMAIN));
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2139	err = build_sched_domains(doms_cur[0], NULL);
				2140	register_sched_domain_sysctl();
				2141
				2142	return err;
				2143	}
				2144
				2145	/*
				2146	* Detach sched domains from a group of CPUs specified in cpu_map
				2147	* These CPUs will now be attached to the NULL domain
				2148	*/
				2149	static void detach_destroy_domains(const struct cpumask *cpu_map)
				2150	{
Valentin Schneider	e284df7	2019-10-23 16:37:45 +0100	[diff] [blame]	2151	unsigned int cpu = cpumask_any(cpu_map);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2152	int i;
				2153
Valentin Schneider	e284df7	2019-10-23 16:37:45 +0100	[diff] [blame]	2154	if (rcu_access_pointer(per_cpu(sd_asym_cpucapacity, cpu)))
				2155	static_branch_dec_cpuslocked(&sched_asym_cpucapacity);
				2156
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2157	rcu_read_lock();
				2158	for_each_cpu(i, cpu_map)
				2159	cpu_attach_domain(NULL, &def_root_domain, i);
				2160	rcu_read_unlock();
				2161	}
				2162
				2163	/* handle null as "default" */
				2164	static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
				2165	struct sched_domain_attr *new, int idx_new)
				2166	{
				2167	struct sched_domain_attr tmp;
				2168
				2169	/* Fast path: */
				2170	if (!new && !cur)
				2171	return 1;
				2172
				2173	tmp = SD_ATTR_INIT;
Ingo Molnar	97fb7a0	2018-03-03 14:01:12 +0100	[diff] [blame]	2174
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2175	return !memcmp(cur ? (cur + idx_cur) : &tmp,
				2176	new ? (new + idx_new) : &tmp,
				2177	sizeof(struct sched_domain_attr));
				2178	}
				2179
				2180	/*
				2181	* Partition sched domains as specified by the 'ndoms_new'
				2182	* cpumasks in the array doms_new[] of cpumasks. This compares
				2183	* doms_new[] to the current sched domain partitioning, doms_cur[].
				2184	* It destroys each deleted domain and builds each new domain.
				2185	*
				2186	* 'doms_new' is an array of cpumask_var_t's of length 'ndoms_new'.
				2187	* The masks don't intersect (don't overlap.) We should setup one
				2188	* sched domain for each mask. CPUs not in any of the cpumasks will
				2189	* not be load balanced. If the same cpumask appears both in the
				2190	* current 'doms_cur' domains and in the new 'doms_new', we can leave
				2191	* it as it is.
				2192	*
				2193	* The passed in 'doms_new' should be allocated using
				2194	* alloc_sched_domains. This routine takes ownership of it and will
				2195	* free_sched_domains it when done with it. If the caller failed the
				2196	* alloc call, then it can pass in doms_new == NULL && ndoms_new == 1,
				2197	* and partition_sched_domains() will fallback to the single partition
				2198	* 'fallback_doms', it also forces the domains to be rebuilt.
				2199	*
				2200	* If doms_new == NULL it will be replaced with cpu_online_mask.
				2201	* ndoms_new == 0 is a special case for destroying existing domains,
				2202	* and it will not create the default domain.
				2203	*
Mathieu Poirier	c22645f	2019-07-19 15:59:53 +0200	[diff] [blame]	2204	* Call with hotplug lock and sched_domains_mutex held
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2205	*/
Mathieu Poirier	c22645f	2019-07-19 15:59:53 +0200	[diff] [blame]	2206	void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[],
				2207	struct sched_domain_attr *dattr_new)
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2208	{
Quentin Perret	1f74de8	2018-12-03 09:56:22 +0000	[diff] [blame]	2209	bool __maybe_unused has_eas = false;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2210	int i, j, n;
				2211	int new_topology;
				2212
Mathieu Poirier	c22645f	2019-07-19 15:59:53 +0200	[diff] [blame]	2213	lockdep_assert_held(&sched_domains_mutex);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2214
				2215	/* Always unregister in case we don't destroy any domains: */
				2216	unregister_sched_domain_sysctl();
				2217
				2218	/* Let the architecture update CPU core mappings: */
				2219	new_topology = arch_update_cpu_topology();
				2220
Peter Zijlstra	09e0dd8	2017-08-08 12:16:24 +0200	[diff] [blame]	2221	if (!doms_new) {
				2222	WARN_ON_ONCE(dattr_new);
				2223	n = 0;
				2224	doms_new = alloc_sched_domains(1);
				2225	if (doms_new) {
				2226	n = 1;
Frederic Weisbecker	edb9382	2017-10-27 04:42:37 +0200	[diff] [blame]	2227	cpumask_and(doms_new[0], cpu_active_mask,
				2228	housekeeping_cpumask(HK_FLAG_DOMAIN));
Peter Zijlstra	09e0dd8	2017-08-08 12:16:24 +0200	[diff] [blame]	2229	}
				2230	} else {
				2231	n = ndoms_new;
				2232	}
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2233
				2234	/* Destroy deleted domains: */
				2235	for (i = 0; i < ndoms_cur; i++) {
				2236	for (j = 0; j < n && !new_topology; j++) {
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	2237	if (cpumask_equal(doms_cur[i], doms_new[j]) &&
Mathieu Poirier	f9a25f7	2019-07-19 15:59:55 +0200	[diff] [blame]	2238	dattrs_equal(dattr_cur, i, dattr_new, j)) {
				2239	struct root_domain *rd;
				2240
				2241	/*
				2242	* This domain won't be destroyed and as such
				2243	* its dl_bw->total_bw needs to be cleared. It
				2244	* will be recomputed in function
				2245	* update_tasks_root_domain().
				2246	*/
				2247	rd = cpu_rq(cpumask_any(doms_cur[i]))->rd;
				2248	dl_clear_root_domain(rd);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2249	goto match1;
Mathieu Poirier	f9a25f7	2019-07-19 15:59:55 +0200	[diff] [blame]	2250	}
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2251	}
				2252	/* No match - a current sched domain not in new doms_new[] */
				2253	detach_destroy_domains(doms_cur[i]);
				2254	match1:
				2255	;
				2256	}
				2257
				2258	n = ndoms_cur;
Peter Zijlstra	09e0dd8	2017-08-08 12:16:24 +0200	[diff] [blame]	2259	if (!doms_new) {
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2260	n = 0;
				2261	doms_new = &fallback_doms;
Frederic Weisbecker	edb9382	2017-10-27 04:42:37 +0200	[diff] [blame]	2262	cpumask_and(doms_new[0], cpu_active_mask,
				2263	housekeeping_cpumask(HK_FLAG_DOMAIN));
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2264	}
				2265
				2266	/* Build new domains: */
				2267	for (i = 0; i < ndoms_new; i++) {
				2268	for (j = 0; j < n && !new_topology; j++) {
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	2269	if (cpumask_equal(doms_new[i], doms_cur[j]) &&
				2270	dattrs_equal(dattr_new, i, dattr_cur, j))
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2271	goto match2;
				2272	}
				2273	/* No match - add a new doms_new */
				2274	build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL);
				2275	match2:
				2276	;
				2277	}
				2278
Quentin Perret	531b5c9	2018-12-03 09:56:21 +0000	[diff] [blame]	2279	#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	2280	/* Build perf. domains: */
				2281	for (i = 0; i < ndoms_new; i++) {
Quentin Perret	531b5c9	2018-12-03 09:56:21 +0000	[diff] [blame]	2282	for (j = 0; j < n && !sched_energy_update; j++) {
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	2283	if (cpumask_equal(doms_new[i], doms_cur[j]) &&
Quentin Perret	1f74de8	2018-12-03 09:56:22 +0000	[diff] [blame]	2284	cpu_rq(cpumask_first(doms_cur[j]))->rd->pd) {
				2285	has_eas = true;
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	2286	goto match3;
Quentin Perret	1f74de8	2018-12-03 09:56:22 +0000	[diff] [blame]	2287	}
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	2288	}
				2289	/* No match - add perf. domains for a new rd */
Quentin Perret	1f74de8	2018-12-03 09:56:22 +0000	[diff] [blame]	2290	has_eas \|= build_perf_domains(doms_new[i]);
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	2291	match3:
				2292	;
				2293	}
Quentin Perret	1f74de8	2018-12-03 09:56:22 +0000	[diff] [blame]	2294	sched_energy_set(has_eas);
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	2295	#endif
				2296
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2297	/* Remember the new sched domains: */
				2298	if (doms_cur != &fallback_doms)
				2299	free_sched_domains(doms_cur, ndoms_cur);
				2300
				2301	kfree(dattr_cur);
				2302	doms_cur = doms_new;
				2303	dattr_cur = dattr_new;
				2304	ndoms_cur = ndoms_new;
				2305
				2306	register_sched_domain_sysctl();
Mathieu Poirier	c22645f	2019-07-19 15:59:53 +0200	[diff] [blame]	2307	}
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2308
Mathieu Poirier	c22645f	2019-07-19 15:59:53 +0200	[diff] [blame]	2309	/*
				2310	* Call with hotplug lock held
				2311	*/
				2312	void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
				2313	struct sched_domain_attr *dattr_new)
				2314	{
				2315	mutex_lock(&sched_domains_mutex);
				2316	partition_sched_domains_locked(ndoms_new, doms_new, dattr_new);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2317	mutex_unlock(&sched_domains_mutex);
				2318	}