Blame - kernel/sched/topology.c - SHIFTPHONES/mainline/linux

blob: f36ed96f3197500568c08ee83abda91ecde26e3e [file] [log] [blame]

Greg Kroah-Hartman	b244131	2017-11-01 15:07:57 +0100	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2	/*
				3	* Scheduler topology setup/handling methods
				4	*/
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	5	#include "sched.h"
				6
				7	DEFINE_MUTEX(sched_domains_mutex);
				8
				9	/* Protected by sched_domains_mutex: */
zhong jiang	ace8031	2018-08-03 20:37:32 +0800	[diff] [blame]	10	static cpumask_var_t sched_domains_tmpmask;
				11	static cpumask_var_t sched_domains_tmpmask2;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	12
				13	#ifdef CONFIG_SCHED_DEBUG
				14
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	15	static int __init sched_debug_setup(char *str)
				16	{
Peter Zijlstra	9469eb0	2017-09-07 17:03:53 +0200	[diff] [blame]	17	sched_debug_enabled = true;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	18
				19	return 0;
				20	}
				21	early_param("sched_debug", sched_debug_setup);
				22
				23	static inline bool sched_debug(void)
				24	{
				25	return sched_debug_enabled;
				26	}
				27
				28	static int sched_domain_debug_one(struct sched_domain *sd, int cpu, int level,
				29	struct cpumask *groupmask)
				30	{
				31	struct sched_group *group = sd->groups;
Valentin Schneider	65c5e25	2020-08-17 12:29:51 +0100	[diff] [blame]	32	unsigned long flags = sd->flags;
				33	unsigned int idx;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	34
				35	cpumask_clear(groupmask);
				36
Peter Zijlstra	005f874	2017-04-26 17:35:35 +0200	[diff] [blame]	37	printk(KERN_DEBUG "%*s domain-%d: ", level, "", level);
Peter Zijlstra	005f874	2017-04-26 17:35:35 +0200	[diff] [blame]	38	printk(KERN_CONT "span=%*pbl level=%s\n",
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	39	cpumask_pr_args(sched_domain_span(sd)), sd->name);
				40
				41	if (!cpumask_test_cpu(cpu, sched_domain_span(sd))) {
Ingo Molnar	97fb7a0	2018-03-03 14:01:12 +0100	[diff] [blame]	42	printk(KERN_ERR "ERROR: domain->span does not contain CPU%d\n", cpu);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	43	}
Yi Wang	6cd0c58	2018-07-23 12:19:07 +0800	[diff] [blame]	44	if (group && !cpumask_test_cpu(cpu, sched_group_span(group))) {
Ingo Molnar	97fb7a0	2018-03-03 14:01:12 +0100	[diff] [blame]	45	printk(KERN_ERR "ERROR: domain->groups does not contain CPU%d\n", cpu);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	46	}
				47
Valentin Schneider	65c5e25	2020-08-17 12:29:51 +0100	[diff] [blame]	48	for_each_set_bit(idx, &flags, __SD_FLAG_CNT) {
				49	unsigned int flag = BIT(idx);
				50	unsigned int meta_flags = sd_flag_debug[idx].meta_flags;
				51
				52	if ((meta_flags & SDF_SHARED_CHILD) && sd->child &&
				53	!(sd->child->flags & flag))
				54	printk(KERN_ERR "ERROR: flag %s set here but not in child\n",
				55	sd_flag_debug[idx].name);
				56
				57	if ((meta_flags & SDF_SHARED_PARENT) && sd->parent &&
				58	!(sd->parent->flags & flag))
				59	printk(KERN_ERR "ERROR: flag %s set here but not in parent\n",
				60	sd_flag_debug[idx].name);
				61	}
				62
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	63	printk(KERN_DEBUG "%*s groups:", level + 1, "");
				64	do {
				65	if (!group) {
				66	printk("\n");
				67	printk(KERN_ERR "ERROR: group is NULL\n");
				68	break;
				69	}
				70
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	71	if (!cpumask_weight(sched_group_span(group))) {
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	72	printk(KERN_CONT "\n");
				73	printk(KERN_ERR "ERROR: empty group\n");
				74	break;
				75	}
				76
				77	if (!(sd->flags & SD_OVERLAP) &&
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	78	cpumask_intersects(groupmask, sched_group_span(group))) {
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	79	printk(KERN_CONT "\n");
				80	printk(KERN_ERR "ERROR: repeated CPUs\n");
				81	break;
				82	}
				83
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	84	cpumask_or(groupmask, groupmask, sched_group_span(group));
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	85
Peter Zijlstra	005f874	2017-04-26 17:35:35 +0200	[diff] [blame]	86	printk(KERN_CONT " %d:{ span=%*pbl",
				87	group->sgc->id,
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	88	cpumask_pr_args(sched_group_span(group)));
Peter Zijlstra	b0151c2	2017-04-14 17:29:16 +0200	[diff] [blame]	89
Peter Zijlstra	af21812	2017-05-01 08:51:05 +0200	[diff] [blame]	90	if ((sd->flags & SD_OVERLAP) &&
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	91	!cpumask_equal(group_balance_mask(group), sched_group_span(group))) {
Peter Zijlstra	005f874	2017-04-26 17:35:35 +0200	[diff] [blame]	92	printk(KERN_CONT " mask=%*pbl",
Peter Zijlstra	e5c14b1	2017-05-01 10:47:02 +0200	[diff] [blame]	93	cpumask_pr_args(group_balance_mask(group)));
Peter Zijlstra	b0151c2	2017-04-14 17:29:16 +0200	[diff] [blame]	94	}
				95
Peter Zijlstra	005f874	2017-04-26 17:35:35 +0200	[diff] [blame]	96	if (group->sgc->capacity != SCHED_CAPACITY_SCALE)
				97	printk(KERN_CONT " cap=%lu", group->sgc->capacity);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	98
Peter Zijlstra	a420b06	2017-04-14 18:20:48 +0200	[diff] [blame]	99	if (group == sd->groups && sd->child &&
				100	!cpumask_equal(sched_domain_span(sd->child),
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	101	sched_group_span(group))) {
Peter Zijlstra	a420b06	2017-04-14 18:20:48 +0200	[diff] [blame]	102	printk(KERN_ERR "ERROR: domain->groups does not match domain->child\n");
				103	}
				104
Peter Zijlstra	005f874	2017-04-26 17:35:35 +0200	[diff] [blame]	105	printk(KERN_CONT " }");
				106
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	107	group = group->next;
Peter Zijlstra	b0151c2	2017-04-14 17:29:16 +0200	[diff] [blame]	108
				109	if (group != sd->groups)
				110	printk(KERN_CONT ",");
				111
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	112	} while (group != sd->groups);
				113	printk(KERN_CONT "\n");
				114
				115	if (!cpumask_equal(sched_domain_span(sd), groupmask))
				116	printk(KERN_ERR "ERROR: groups don't span domain->span\n");
				117
				118	if (sd->parent &&
				119	!cpumask_subset(groupmask, sched_domain_span(sd->parent)))
Ingo Molnar	97fb7a0	2018-03-03 14:01:12 +0100	[diff] [blame]	120	printk(KERN_ERR "ERROR: parent span is not a superset of domain->span\n");
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	121	return 0;
				122	}
				123
				124	static void sched_domain_debug(struct sched_domain *sd, int cpu)
				125	{
				126	int level = 0;
				127
				128	if (!sched_debug_enabled)
				129	return;
				130
				131	if (!sd) {
				132	printk(KERN_DEBUG "CPU%d attaching NULL sched-domain.\n", cpu);
				133	return;
				134	}
				135
Peter Zijlstra	005f874	2017-04-26 17:35:35 +0200	[diff] [blame]	136	printk(KERN_DEBUG "CPU%d attaching sched-domain(s):\n", cpu);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	137
				138	for (;;) {
				139	if (sched_domain_debug_one(sd, cpu, level, sched_domains_tmpmask))
				140	break;
				141	level++;
				142	sd = sd->parent;
				143	if (!sd)
				144	break;
				145	}
				146	}
				147	#else /* !CONFIG_SCHED_DEBUG */
				148
				149	# define sched_debug_enabled 0
				150	# define sched_domain_debug(sd, cpu) do { } while (0)
				151	static inline bool sched_debug(void)
				152	{
				153	return false;
				154	}
				155	#endif /* CONFIG_SCHED_DEBUG */
				156
				157	static int sd_degenerate(struct sched_domain *sd)
				158	{
				159	if (cpumask_weight(sched_domain_span(sd)) == 1)
				160	return 1;
				161
				162	/* Following flags need at least 2 groups */
Valentin Schneider	6f34981	2020-08-17 12:29:54 +0100	[diff] [blame]	163	if ((sd->flags & SD_DEGENERATE_GROUPS_MASK) &&
				164	(sd->groups != sd->groups->next))
				165	return 0;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	166
				167	/* Following flags don't use groups */
				168	if (sd->flags & (SD_WAKE_AFFINE))
				169	return 0;
				170
				171	return 1;
				172	}
				173
				174	static int
				175	sd_parent_degenerate(struct sched_domain sd, struct sched_domain parent)
				176	{
				177	unsigned long cflags = sd->flags, pflags = parent->flags;
				178
				179	if (sd_degenerate(parent))
				180	return 1;
				181
				182	if (!cpumask_equal(sched_domain_span(sd), sched_domain_span(parent)))
				183	return 0;
				184
				185	/* Flags needing groups don't count if only 1 group in parent */
Valentin Schneider	ab65afb	2020-08-17 12:29:55 +0100	[diff] [blame]	186	if (parent->groups == parent->groups->next)
Valentin Schneider	6f34981	2020-08-17 12:29:54 +0100	[diff] [blame]	187	pflags &= ~(SD_DEGENERATE_GROUPS_MASK \| SD_PREFER_SIBLING);
Valentin Schneider	ab65afb	2020-08-17 12:29:55 +0100	[diff] [blame]	188
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	189	if (~cflags & pflags)
				190	return 0;
				191
				192	return 1;
				193	}
				194
Quentin Perret	531b5c9	2018-12-03 09:56:21 +0000	[diff] [blame]	195	#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
Peter Zijlstra	f8a696f	2018-12-05 11:23:56 +0100	[diff] [blame]	196	DEFINE_STATIC_KEY_FALSE(sched_energy_present);
Quentin Perret	8d5d0cf	2018-12-03 09:56:23 +0000	[diff] [blame]	197	unsigned int sysctl_sched_energy_aware = 1;
Quentin Perret	531b5c9	2018-12-03 09:56:21 +0000	[diff] [blame]	198	DEFINE_MUTEX(sched_energy_mutex);
				199	bool sched_energy_update;
				200
Quentin Perret	8d5d0cf	2018-12-03 09:56:23 +0000	[diff] [blame]	201	#ifdef CONFIG_PROC_SYSCTL
				202	int sched_energy_aware_handler(struct ctl_table *table, int write,
Christoph Hellwig	3292739	2020-04-24 08:43:38 +0200	[diff] [blame]	203	void buffer, size_t lenp, loff_t *ppos)
Quentin Perret	8d5d0cf	2018-12-03 09:56:23 +0000	[diff] [blame]	204	{
				205	int ret, state;
				206
				207	if (write && !capable(CAP_SYS_ADMIN))
				208	return -EPERM;
				209
				210	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
				211	if (!ret && write) {
				212	state = static_branch_unlikely(&sched_energy_present);
				213	if (state != sysctl_sched_energy_aware) {
				214	mutex_lock(&sched_energy_mutex);
				215	sched_energy_update = 1;
				216	rebuild_sched_domains();
				217	sched_energy_update = 0;
				218	mutex_unlock(&sched_energy_mutex);
				219	}
				220	}
				221
				222	return ret;
				223	}
				224	#endif
				225
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	226	static void free_pd(struct perf_domain *pd)
				227	{
				228	struct perf_domain *tmp;
				229
				230	while (pd) {
				231	tmp = pd->next;
				232	kfree(pd);
				233	pd = tmp;
				234	}
				235	}
				236
				237	static struct perf_domain find_pd(struct perf_domain pd, int cpu)
				238	{
				239	while (pd) {
				240	if (cpumask_test_cpu(cpu, perf_domain_span(pd)))
				241	return pd;
				242	pd = pd->next;
				243	}
				244
				245	return NULL;
				246	}
				247
				248	static struct perf_domain *pd_init(int cpu)
				249	{
				250	struct em_perf_domain *obj = em_cpu_get(cpu);
				251	struct perf_domain *pd;
				252
				253	if (!obj) {
				254	if (sched_debug())
				255	pr_info("%s: no EM found for CPU%d\n", __func__, cpu);
				256	return NULL;
				257	}
				258
				259	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
				260	if (!pd)
				261	return NULL;
				262	pd->em_pd = obj;
				263
				264	return pd;
				265	}
				266
				267	static void perf_domain_debug(const struct cpumask *cpu_map,
				268	struct perf_domain *pd)
				269	{
				270	if (!sched_debug() \|\| !pd)
				271	return;
				272
				273	printk(KERN_DEBUG "root_domain %*pbl:", cpumask_pr_args(cpu_map));
				274
				275	while (pd) {
Lukasz Luba	521b512	2020-05-27 10:58:47 +0100	[diff] [blame]	276	printk(KERN_CONT " pd%d:{ cpus=%*pbl nr_pstate=%d }",
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	277	cpumask_first(perf_domain_span(pd)),
				278	cpumask_pr_args(perf_domain_span(pd)),
Lukasz Luba	521b512	2020-05-27 10:58:47 +0100	[diff] [blame]	279	em_pd_nr_perf_states(pd->em_pd));
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	280	pd = pd->next;
				281	}
				282
				283	printk(KERN_CONT "\n");
				284	}
				285
				286	static void destroy_perf_domain_rcu(struct rcu_head *rp)
				287	{
				288	struct perf_domain *pd;
				289
				290	pd = container_of(rp, struct perf_domain, rcu);
				291	free_pd(pd);
				292	}
				293
Quentin Perret	1f74de8	2018-12-03 09:56:22 +0000	[diff] [blame]	294	static void sched_energy_set(bool has_eas)
				295	{
				296	if (!has_eas && static_branch_unlikely(&sched_energy_present)) {
				297	if (sched_debug())
				298	pr_info("%s: stopping EAS\n", __func__);
				299	static_branch_disable_cpuslocked(&sched_energy_present);
				300	} else if (has_eas && !static_branch_unlikely(&sched_energy_present)) {
				301	if (sched_debug())
				302	pr_info("%s: starting EAS\n", __func__);
				303	static_branch_enable_cpuslocked(&sched_energy_present);
				304	}
				305	}
				306
Quentin Perret	b68a4c0	2018-12-03 09:56:20 +0000	[diff] [blame]	307	/*
				308	* EAS can be used on a root domain if it meets all the following conditions:
				309	* 1. an Energy Model (EM) is available;
				310	* 2. the SD_ASYM_CPUCAPACITY flag is set in the sched_domain hierarchy.
Valentin Schneider	38502ab	2020-02-27 19:14:32 +0000	[diff] [blame]	311	* 3. no SMT is detected.
				312	* 4. the EM complexity is low enough to keep scheduling overheads low;
				313	* 5. schedutil is driving the frequency of all CPUs of the rd;
Quentin Perret	b68a4c0	2018-12-03 09:56:20 +0000	[diff] [blame]	314	*
				315	* The complexity of the Energy Model is defined as:
				316	*
Lukasz Luba	521b512	2020-05-27 10:58:47 +0100	[diff] [blame]	317	* C = nr_pd * (nr_cpus + nr_ps)
Quentin Perret	b68a4c0	2018-12-03 09:56:20 +0000	[diff] [blame]	318	*
				319	* with parameters defined as:
				320	* - nr_pd: the number of performance domains
				321	* - nr_cpus: the number of CPUs
Lukasz Luba	521b512	2020-05-27 10:58:47 +0100	[diff] [blame]	322	* - nr_ps: the sum of the number of performance states of all performance
Quentin Perret	b68a4c0	2018-12-03 09:56:20 +0000	[diff] [blame]	323	* domains (for example, on a system with 2 performance domains,
Lukasz Luba	521b512	2020-05-27 10:58:47 +0100	[diff] [blame]	324	* with 10 performance states each, nr_ps = 2 * 10 = 20).
Quentin Perret	b68a4c0	2018-12-03 09:56:20 +0000	[diff] [blame]	325	*
				326	* It is generally not a good idea to use such a model in the wake-up path on
				327	* very complex platforms because of the associated scheduling overheads. The
				328	* arbitrary constraint below prevents that. It makes EAS usable up to 16 CPUs
Lukasz Luba	521b512	2020-05-27 10:58:47 +0100	[diff] [blame]	329	* with per-CPU DVFS and less than 8 performance states each, for example.
Quentin Perret	b68a4c0	2018-12-03 09:56:20 +0000	[diff] [blame]	330	*/
				331	#define EM_MAX_COMPLEXITY 2048
				332
Quentin Perret	531b5c9	2018-12-03 09:56:21 +0000	[diff] [blame]	333	extern struct cpufreq_governor schedutil_gov;
Quentin Perret	1f74de8	2018-12-03 09:56:22 +0000	[diff] [blame]	334	static bool build_perf_domains(const struct cpumask *cpu_map)
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	335	{
Lukasz Luba	521b512	2020-05-27 10:58:47 +0100	[diff] [blame]	336	int i, nr_pd = 0, nr_ps = 0, nr_cpus = cpumask_weight(cpu_map);
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	337	struct perf_domain pd = NULL, tmp;
				338	int cpu = cpumask_first(cpu_map);
				339	struct root_domain *rd = cpu_rq(cpu)->rd;
Quentin Perret	531b5c9	2018-12-03 09:56:21 +0000	[diff] [blame]	340	struct cpufreq_policy *policy;
				341	struct cpufreq_governor *gov;
Quentin Perret	b68a4c0	2018-12-03 09:56:20 +0000	[diff] [blame]	342
Quentin Perret	8d5d0cf	2018-12-03 09:56:23 +0000	[diff] [blame]	343	if (!sysctl_sched_energy_aware)
				344	goto free;
				345
Quentin Perret	b68a4c0	2018-12-03 09:56:20 +0000	[diff] [blame]	346	/* EAS is enabled for asymmetric CPU capacity topologies. */
				347	if (!per_cpu(sd_asym_cpucapacity, cpu)) {
				348	if (sched_debug()) {
				349	pr_info("rd %*pbl: CPUs do not have asymmetric capacities\n",
				350	cpumask_pr_args(cpu_map));
				351	}
				352	goto free;
				353	}
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	354
Valentin Schneider	38502ab	2020-02-27 19:14:32 +0000	[diff] [blame]	355	/* EAS definitely does not handle SMT */
				356	if (sched_smt_active()) {
				357	pr_warn("rd %*pbl: Disabling EAS, SMT is not supported\n",
				358	cpumask_pr_args(cpu_map));
				359	goto free;
				360	}
				361
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	362	for_each_cpu(i, cpu_map) {
				363	/* Skip already covered CPUs. */
				364	if (find_pd(pd, i))
				365	continue;
				366
Quentin Perret	531b5c9	2018-12-03 09:56:21 +0000	[diff] [blame]	367	/* Do not attempt EAS if schedutil is not being used. */
				368	policy = cpufreq_cpu_get(i);
				369	if (!policy)
				370	goto free;
				371	gov = policy->governor;
				372	cpufreq_cpu_put(policy);
				373	if (gov != &schedutil_gov) {
				374	if (rd->pd)
				375	pr_warn("rd %*pbl: Disabling EAS, schedutil is mandatory\n",
				376	cpumask_pr_args(cpu_map));
				377	goto free;
				378	}
				379
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	380	/* Create the new pd and add it to the local list. */
				381	tmp = pd_init(i);
				382	if (!tmp)
				383	goto free;
				384	tmp->next = pd;
				385	pd = tmp;
Quentin Perret	b68a4c0	2018-12-03 09:56:20 +0000	[diff] [blame]	386
				387	/*
Lukasz Luba	521b512	2020-05-27 10:58:47 +0100	[diff] [blame]	388	* Count performance domains and performance states for the
Quentin Perret	b68a4c0	2018-12-03 09:56:20 +0000	[diff] [blame]	389	* complexity check.
				390	*/
				391	nr_pd++;
Lukasz Luba	521b512	2020-05-27 10:58:47 +0100	[diff] [blame]	392	nr_ps += em_pd_nr_perf_states(pd->em_pd);
Quentin Perret	b68a4c0	2018-12-03 09:56:20 +0000	[diff] [blame]	393	}
				394
				395	/* Bail out if the Energy Model complexity is too high. */
Lukasz Luba	521b512	2020-05-27 10:58:47 +0100	[diff] [blame]	396	if (nr_pd * (nr_ps + nr_cpus) > EM_MAX_COMPLEXITY) {
Quentin Perret	b68a4c0	2018-12-03 09:56:20 +0000	[diff] [blame]	397	WARN(1, "rd %*pbl: Failed to start EAS, EM complexity is too high\n",
				398	cpumask_pr_args(cpu_map));
				399	goto free;
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	400	}
				401
				402	perf_domain_debug(cpu_map, pd);
				403
				404	/* Attach the new list of performance domains to the root domain. */
				405	tmp = rd->pd;
				406	rcu_assign_pointer(rd->pd, pd);
				407	if (tmp)
				408	call_rcu(&tmp->rcu, destroy_perf_domain_rcu);
				409
Quentin Perret	1f74de8	2018-12-03 09:56:22 +0000	[diff] [blame]	410	return !!pd;
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	411
				412	free:
				413	free_pd(pd);
				414	tmp = rd->pd;
				415	rcu_assign_pointer(rd->pd, NULL);
				416	if (tmp)
				417	call_rcu(&tmp->rcu, destroy_perf_domain_rcu);
Quentin Perret	1f74de8	2018-12-03 09:56:22 +0000	[diff] [blame]	418
				419	return false;
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	420	}
				421	#else
				422	static void free_pd(struct perf_domain *pd) { }
Quentin Perret	531b5c9	2018-12-03 09:56:21 +0000	[diff] [blame]	423	#endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL*/
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	424
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	425	static void free_rootdomain(struct rcu_head *rcu)
				426	{
				427	struct root_domain *rd = container_of(rcu, struct root_domain, rcu);
				428
				429	cpupri_cleanup(&rd->cpupri);
				430	cpudl_cleanup(&rd->cpudl);
				431	free_cpumask_var(rd->dlo_mask);
				432	free_cpumask_var(rd->rto_mask);
				433	free_cpumask_var(rd->online);
				434	free_cpumask_var(rd->span);
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	435	free_pd(rd->pd);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	436	kfree(rd);
				437	}
				438
				439	void rq_attach_root(struct rq rq, struct root_domain rd)
				440	{
				441	struct root_domain *old_rd = NULL;
				442	unsigned long flags;
				443
				444	raw_spin_lock_irqsave(&rq->lock, flags);
				445
				446	if (rq->rd) {
				447	old_rd = rq->rd;
				448
				449	if (cpumask_test_cpu(rq->cpu, old_rd->online))
				450	set_rq_offline(rq);
				451
				452	cpumask_clear_cpu(rq->cpu, old_rd->span);
				453
				454	/*
				455	* If we dont want to free the old_rd yet then
				456	* set old_rd to NULL to skip the freeing later
				457	* in this function:
				458	*/
				459	if (!atomic_dec_and_test(&old_rd->refcount))
				460	old_rd = NULL;
				461	}
				462
				463	atomic_inc(&rd->refcount);
				464	rq->rd = rd;
				465
				466	cpumask_set_cpu(rq->cpu, rd->span);
				467	if (cpumask_test_cpu(rq->cpu, cpu_active_mask))
				468	set_rq_online(rq);
				469
				470	raw_spin_unlock_irqrestore(&rq->lock, flags);
				471
				472	if (old_rd)
Paul E. McKenney	337e9b0	2018-11-06 19:10:53 -0800	[diff] [blame]	473	call_rcu(&old_rd->rcu, free_rootdomain);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	474	}
				475
Steven Rostedt (VMware)	364f566	2018-01-23 20:45:38 -0500	[diff] [blame]	476	void sched_get_rd(struct root_domain *rd)
				477	{
				478	atomic_inc(&rd->refcount);
				479	}
				480
				481	void sched_put_rd(struct root_domain *rd)
				482	{
				483	if (!atomic_dec_and_test(&rd->refcount))
				484	return;
				485
Paul E. McKenney	337e9b0	2018-11-06 19:10:53 -0800	[diff] [blame]	486	call_rcu(&rd->rcu, free_rootdomain);
Steven Rostedt (VMware)	364f566	2018-01-23 20:45:38 -0500	[diff] [blame]	487	}
				488
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	489	static int init_rootdomain(struct root_domain *rd)
				490	{
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	491	if (!zalloc_cpumask_var(&rd->span, GFP_KERNEL))
				492	goto out;
				493	if (!zalloc_cpumask_var(&rd->online, GFP_KERNEL))
				494	goto free_span;
				495	if (!zalloc_cpumask_var(&rd->dlo_mask, GFP_KERNEL))
				496	goto free_online;
				497	if (!zalloc_cpumask_var(&rd->rto_mask, GFP_KERNEL))
				498	goto free_dlo_mask;
				499
Steven Rostedt (Red Hat)	4bdced5	2017-10-06 14:05:04 -0400	[diff] [blame]	500	#ifdef HAVE_RT_PUSH_IPI
				501	rd->rto_cpu = -1;
				502	raw_spin_lock_init(&rd->rto_lock);
				503	init_irq_work(&rd->rto_push_work, rto_push_irq_work_func);
				504	#endif
				505
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	506	init_dl_bw(&rd->dl_bw);
				507	if (cpudl_init(&rd->cpudl) != 0)
				508	goto free_rto_mask;
				509
				510	if (cpupri_init(&rd->cpupri) != 0)
				511	goto free_cpudl;
				512	return 0;
				513
				514	free_cpudl:
				515	cpudl_cleanup(&rd->cpudl);
				516	free_rto_mask:
				517	free_cpumask_var(rd->rto_mask);
				518	free_dlo_mask:
				519	free_cpumask_var(rd->dlo_mask);
				520	free_online:
				521	free_cpumask_var(rd->online);
				522	free_span:
				523	free_cpumask_var(rd->span);
				524	out:
				525	return -ENOMEM;
				526	}
				527
				528	/*
				529	* By default the system creates a single root-domain with all CPUs as
				530	* members (mimicking the global state we have today).
				531	*/
				532	struct root_domain def_root_domain;
				533
				534	void init_defrootdomain(void)
				535	{
				536	init_rootdomain(&def_root_domain);
				537
				538	atomic_set(&def_root_domain.refcount, 1);
				539	}
				540
				541	static struct root_domain *alloc_rootdomain(void)
				542	{
				543	struct root_domain *rd;
				544
Viresh Kumar	4d13a06	2017-04-13 14:45:48 +0530	[diff] [blame]	545	rd = kzalloc(sizeof(*rd), GFP_KERNEL);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	546	if (!rd)
				547	return NULL;
				548
				549	if (init_rootdomain(rd) != 0) {
				550	kfree(rd);
				551	return NULL;
				552	}
				553
				554	return rd;
				555	}
				556
				557	static void free_sched_groups(struct sched_group *sg, int free_sgc)
				558	{
				559	struct sched_group tmp, first;
				560
				561	if (!sg)
				562	return;
				563
				564	first = sg;
				565	do {
				566	tmp = sg->next;
				567
				568	if (free_sgc && atomic_dec_and_test(&sg->sgc->ref))
				569	kfree(sg->sgc);
				570
Shu Wang	213c5a4	2017-08-10 15:52:16 +0800	[diff] [blame]	571	if (atomic_dec_and_test(&sg->ref))
				572	kfree(sg);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	573	sg = tmp;
				574	} while (sg != first);
				575	}
				576
				577	static void destroy_sched_domain(struct sched_domain *sd)
				578	{
				579	/*
Peter Zijlstra	a090c4f	2017-08-21 15:42:52 +0200	[diff] [blame]	580	* A normal sched domain may have multiple group references, an
				581	* overlapping domain, having private groups, only one. Iterate,
				582	* dropping group/capacity references, freeing where none remain.
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	583	*/
Shu Wang	213c5a4	2017-08-10 15:52:16 +0800	[diff] [blame]	584	free_sched_groups(sd->groups, 1);
				585
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	586	if (sd->shared && atomic_dec_and_test(&sd->shared->ref))
				587	kfree(sd->shared);
				588	kfree(sd);
				589	}
				590
				591	static void destroy_sched_domains_rcu(struct rcu_head *rcu)
				592	{
				593	struct sched_domain *sd = container_of(rcu, struct sched_domain, rcu);
				594
				595	while (sd) {
				596	struct sched_domain *parent = sd->parent;
				597	destroy_sched_domain(sd);
				598	sd = parent;
				599	}
				600	}
				601
				602	static void destroy_sched_domains(struct sched_domain *sd)
				603	{
				604	if (sd)
				605	call_rcu(&sd->rcu, destroy_sched_domains_rcu);
				606	}
				607
				608	/*
				609	* Keep a special pointer to the highest sched_domain that has
				610	* SD_SHARE_PKG_RESOURCE set (Last Level Cache Domain) for this
				611	* allows us to avoid some pointer chasing select_idle_sibling().
				612	*
				613	* Also keep a unique ID per domain (we use the first CPU number in
				614	* the cpumask of the domain), this allows us to quickly tell if
				615	* two CPUs are in the same cache domain, see cpus_share_cache().
				616	*/
Joel Fernandes (Google)	994aeb7	2019-03-20 20:34:24 -0400	[diff] [blame]	617	DEFINE_PER_CPU(struct sched_domain __rcu *, sd_llc);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	618	DEFINE_PER_CPU(int, sd_llc_size);
				619	DEFINE_PER_CPU(int, sd_llc_id);
Joel Fernandes (Google)	994aeb7	2019-03-20 20:34:24 -0400	[diff] [blame]	620	DEFINE_PER_CPU(struct sched_domain_shared __rcu *, sd_llc_shared);
				621	DEFINE_PER_CPU(struct sched_domain __rcu *, sd_numa);
				622	DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_packing);
				623	DEFINE_PER_CPU(struct sched_domain __rcu *, sd_asym_cpucapacity);
Morten Rasmussen	df054e8	2018-07-04 11:17:39 +0100	[diff] [blame]	624	DEFINE_STATIC_KEY_FALSE(sched_asym_cpucapacity);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	625
				626	static void update_top_cache_domain(int cpu)
				627	{
				628	struct sched_domain_shared *sds = NULL;
				629	struct sched_domain *sd;
				630	int id = cpu;
				631	int size = 1;
				632
				633	sd = highest_flag_domain(cpu, SD_SHARE_PKG_RESOURCES);
				634	if (sd) {
				635	id = cpumask_first(sched_domain_span(sd));
				636	size = cpumask_weight(sched_domain_span(sd));
				637	sds = sd->shared;
				638	}
				639
				640	rcu_assign_pointer(per_cpu(sd_llc, cpu), sd);
				641	per_cpu(sd_llc_size, cpu) = size;
				642	per_cpu(sd_llc_id, cpu) = id;
				643	rcu_assign_pointer(per_cpu(sd_llc_shared, cpu), sds);
				644
				645	sd = lowest_flag_domain(cpu, SD_NUMA);
				646	rcu_assign_pointer(per_cpu(sd_numa, cpu), sd);
				647
				648	sd = highest_flag_domain(cpu, SD_ASYM_PACKING);
Quentin Perret	011b27b	2018-12-03 09:56:19 +0000	[diff] [blame]	649	rcu_assign_pointer(per_cpu(sd_asym_packing, cpu), sd);
				650
				651	sd = lowest_flag_domain(cpu, SD_ASYM_CPUCAPACITY);
				652	rcu_assign_pointer(per_cpu(sd_asym_cpucapacity, cpu), sd);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	653	}
				654
				655	/*
				656	* Attach the domain 'sd' to 'cpu' as its base domain. Callers must
				657	* hold the hotplug lock.
				658	*/
				659	static void
				660	cpu_attach_domain(struct sched_domain sd, struct root_domain rd, int cpu)
				661	{
				662	struct rq *rq = cpu_rq(cpu);
				663	struct sched_domain *tmp;
				664
				665	/* Remove the sched domains which do not contribute to scheduling. */
				666	for (tmp = sd; tmp; ) {
				667	struct sched_domain *parent = tmp->parent;
				668	if (!parent)
				669	break;
				670
				671	if (sd_parent_degenerate(tmp, parent)) {
				672	tmp->parent = parent->parent;
				673	if (parent->parent)
				674	parent->parent->child = tmp;
				675	/*
				676	* Transfer SD_PREFER_SIBLING down in case of a
				677	* degenerate parent; the spans match for this
				678	* so the property transfers.
				679	*/
				680	if (parent->flags & SD_PREFER_SIBLING)
				681	tmp->flags \|= SD_PREFER_SIBLING;
				682	destroy_sched_domain(parent);
				683	} else
				684	tmp = tmp->parent;
				685	}
				686
				687	if (sd && sd_degenerate(sd)) {
				688	tmp = sd;
				689	sd = sd->parent;
				690	destroy_sched_domain(tmp);
				691	if (sd)
				692	sd->child = NULL;
				693	}
				694
				695	sched_domain_debug(sd, cpu);
				696
				697	rq_attach_root(rq, rd);
				698	tmp = rq->sd;
				699	rcu_assign_pointer(rq->sd, sd);
Peter Zijlstra	bbdacdf	2017-08-10 17:10:26 +0200	[diff] [blame]	700	dirty_sched_domain_sysctl(cpu);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	701	destroy_sched_domains(tmp);
				702
				703	update_top_cache_domain(cpu);
				704	}
				705
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	706	struct s_data {
Luc Van Oostenryck	99687cd	2019-01-18 15:49:36 +0100	[diff] [blame]	707	struct sched_domain * __percpu *sd;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	708	struct root_domain *rd;
				709	};
				710
				711	enum s_alloc {
				712	sa_rootdomain,
				713	sa_sd,
				714	sa_sd_storage,
				715	sa_none,
				716	};
				717
				718	/*
Peter Zijlstra	35a566e	2017-04-28 10:54:26 +0200	[diff] [blame]	719	* Return the canonical balance CPU for this group, this is the first CPU
Peter Zijlstra	e5c14b1	2017-05-01 10:47:02 +0200	[diff] [blame]	720	* of this group that's also in the balance mask.
Peter Zijlstra	35a566e	2017-04-28 10:54:26 +0200	[diff] [blame]	721	*
Peter Zijlstra	e5c14b1	2017-05-01 10:47:02 +0200	[diff] [blame]	722	* The balance mask are all those CPUs that could actually end up at this
				723	* group. See build_balance_mask().
Peter Zijlstra	35a566e	2017-04-28 10:54:26 +0200	[diff] [blame]	724	*
				725	* Also see should_we_balance().
				726	*/
				727	int group_balance_cpu(struct sched_group *sg)
				728	{
Peter Zijlstra	e5c14b1	2017-05-01 10:47:02 +0200	[diff] [blame]	729	return cpumask_first(group_balance_mask(sg));
Peter Zijlstra	35a566e	2017-04-28 10:54:26 +0200	[diff] [blame]	730	}
				731
				732
				733	/*
				734	* NUMA topology (first read the regular topology blurb below)
				735	*
				736	* Given a node-distance table, for example:
				737	*
				738	* node 0 1 2 3
				739	* 0: 10 20 30 20
				740	* 1: 20 10 20 30
				741	* 2: 30 20 10 20
				742	* 3: 20 30 20 10
				743	*
				744	* which represents a 4 node ring topology like:
				745	*
				746	* 0 ----- 1
				747	* \| \|
				748	* \| \|
				749	* \| \|
				750	* 3 ----- 2
				751	*
				752	* We want to construct domains and groups to represent this. The way we go
				753	* about doing this is to build the domains on 'hops'. For each NUMA level we
				754	* construct the mask of all nodes reachable in @level hops.
				755	*
				756	* For the above NUMA topology that gives 3 levels:
				757	*
				758	* NUMA-2 0-3 0-3 0-3 0-3
				759	* groups: {0-1,3},{1-3} {0-2},{0,2-3} {1-3},{0-1,3} {0,2-3},{0-2}
				760	*
				761	* NUMA-1 0-1,3 0-2 1-3 0,2-3
				762	* groups: {0},{1},{3} {0},{1},{2} {1},{2},{3} {0},{2},{3}
				763	*
				764	* NUMA-0 0 1 2 3
				765	*
				766	*
				767	* As can be seen; things don't nicely line up as with the regular topology.
				768	* When we iterate a domain in child domain chunks some nodes can be
				769	* represented multiple times -- hence the "overlap" naming for this part of
				770	* the topology.
				771	*
				772	* In order to minimize this overlap, we only build enough groups to cover the
				773	* domain. For instance Node-0 NUMA-2 would only get groups: 0-1,3 and 1-3.
				774	*
				775	* Because:
				776	*
				777	* - the first group of each domain is its child domain; this
				778	* gets us the first 0-1,3
				779	* - the only uncovered node is 2, who's child domain is 1-3.
				780	*
				781	* However, because of the overlap, computing a unique CPU for each group is
				782	* more complicated. Consider for instance the groups of NODE-1 NUMA-2, both
				783	* groups include the CPUs of Node-0, while those CPUs would not in fact ever
				784	* end up at those groups (they would end up in group: 0-1,3).
				785	*
Peter Zijlstra	e5c14b1	2017-05-01 10:47:02 +0200	[diff] [blame]	786	* To correct this we have to introduce the group balance mask. This mask
Peter Zijlstra	35a566e	2017-04-28 10:54:26 +0200	[diff] [blame]	787	* will contain those CPUs in the group that can reach this group given the
				788	* (child) domain tree.
				789	*
				790	* With this we can once again compute balance_cpu and sched_group_capacity
				791	* relations.
				792	*
				793	* XXX include words on how balance_cpu is unique and therefore can be
				794	* used for sched_group_capacity links.
				795	*
				796	*
				797	* Another 'interesting' topology is:
				798	*
				799	* node 0 1 2 3
				800	* 0: 10 20 20 30
				801	* 1: 20 10 20 20
				802	* 2: 20 20 10 20
				803	* 3: 30 20 20 10
				804	*
				805	* Which looks a little like:
				806	*
				807	* 0 ----- 1
				808	* \| / \|
				809	* \| / \|
				810	* \| / \|
				811	* 2 ----- 3
				812	*
				813	* This topology is asymmetric, nodes 1,2 are fully connected, but nodes 0,3
				814	* are not.
				815	*
				816	* This leads to a few particularly weird cases where the sched_domain's are
Ingo Molnar	97fb7a0	2018-03-03 14:01:12 +0100	[diff] [blame]	817	* not of the same number for each CPU. Consider:
Peter Zijlstra	35a566e	2017-04-28 10:54:26 +0200	[diff] [blame]	818	*
				819	* NUMA-2 0-3 0-3
				820	* groups: {0-2},{1-3} {1-3},{0-2}
				821	*
				822	* NUMA-1 0-2 0-3 0-3 1-3
				823	*
				824	* NUMA-0 0 1 2 3
				825	*
				826	*/
				827
				828
				829	/*
Peter Zijlstra	e5c14b1	2017-05-01 10:47:02 +0200	[diff] [blame]	830	* Build the balance mask; it contains only those CPUs that can arrive at this
				831	* group and should be considered to continue balancing.
Peter Zijlstra	35a566e	2017-04-28 10:54:26 +0200	[diff] [blame]	832	*
				833	* We do this during the group creation pass, therefore the group information
				834	* isn't complete yet, however since each group represents a (child) domain we
				835	* can fully construct this using the sched_domain bits (which are already
				836	* complete).
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	837	*/
Peter Zijlstra	1676330	2017-04-25 14:31:11 +0200	[diff] [blame]	838	static void
Peter Zijlstra	e5c14b1	2017-05-01 10:47:02 +0200	[diff] [blame]	839	build_balance_mask(struct sched_domain sd, struct sched_group sg, struct cpumask *mask)
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	840	{
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	841	const struct cpumask *sg_span = sched_group_span(sg);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	842	struct sd_data *sdd = sd->private;
				843	struct sched_domain *sibling;
				844	int i;
				845
Peter Zijlstra	1676330	2017-04-25 14:31:11 +0200	[diff] [blame]	846	cpumask_clear(mask);
				847
Lauro Ramos Venancio	f32d782	2017-04-20 16:51:40 -0300	[diff] [blame]	848	for_each_cpu(i, sg_span) {
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	849	sibling = *per_cpu_ptr(sdd->sd, i);
Peter Zijlstra	73bb059	2017-04-25 14:00:49 +0200	[diff] [blame]	850
				851	/*
				852	* Can happen in the asymmetric case, where these siblings are
				853	* unused. The mask will not be empty because those CPUs that
				854	* do have the top domain _should_ span the domain.
				855	*/
				856	if (!sibling->child)
				857	continue;
				858
				859	/* If we would not end up here, we can't continue from here */
				860	if (!cpumask_equal(sg_span, sched_domain_span(sibling->child)))
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	861	continue;
				862
Peter Zijlstra	1676330	2017-04-25 14:31:11 +0200	[diff] [blame]	863	cpumask_set_cpu(i, mask);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	864	}
Peter Zijlstra	73bb059	2017-04-25 14:00:49 +0200	[diff] [blame]	865
				866	/* We must not have empty masks here */
Peter Zijlstra	1676330	2017-04-25 14:31:11 +0200	[diff] [blame]	867	WARN_ON_ONCE(cpumask_empty(mask));
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	868	}
				869
				870	/*
Peter Zijlstra	35a566e	2017-04-28 10:54:26 +0200	[diff] [blame]	871	* XXX: This creates per-node group entries; since the load-balancer will
				872	* immediately access remote memory to construct this group's load-balance
				873	* statistics having the groups node local is of dubious benefit.
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	874	*/
Lauro Ramos Venancio	8c03346	2017-04-13 10:56:07 -0300	[diff] [blame]	875	static struct sched_group *
				876	build_group_from_child_sched_domain(struct sched_domain *sd, int cpu)
				877	{
				878	struct sched_group *sg;
				879	struct cpumask *sg_span;
				880
				881	sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
				882	GFP_KERNEL, cpu_to_node(cpu));
				883
				884	if (!sg)
				885	return NULL;
				886
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	887	sg_span = sched_group_span(sg);
Lauro Ramos Venancio	8c03346	2017-04-13 10:56:07 -0300	[diff] [blame]	888	if (sd->child)
				889	cpumask_copy(sg_span, sched_domain_span(sd->child));
				890	else
				891	cpumask_copy(sg_span, sched_domain_span(sd));
				892
Shu Wang	213c5a4	2017-08-10 15:52:16 +0800	[diff] [blame]	893	atomic_inc(&sg->ref);
Lauro Ramos Venancio	8c03346	2017-04-13 10:56:07 -0300	[diff] [blame]	894	return sg;
				895	}
				896
				897	static void init_overlap_sched_group(struct sched_domain *sd,
Peter Zijlstra	1676330	2017-04-25 14:31:11 +0200	[diff] [blame]	898	struct sched_group *sg)
Lauro Ramos Venancio	8c03346	2017-04-13 10:56:07 -0300	[diff] [blame]	899	{
Peter Zijlstra	1676330	2017-04-25 14:31:11 +0200	[diff] [blame]	900	struct cpumask *mask = sched_domains_tmpmask2;
Lauro Ramos Venancio	8c03346	2017-04-13 10:56:07 -0300	[diff] [blame]	901	struct sd_data *sdd = sd->private;
				902	struct cpumask *sg_span;
Peter Zijlstra	1676330	2017-04-25 14:31:11 +0200	[diff] [blame]	903	int cpu;
				904
Peter Zijlstra	e5c14b1	2017-05-01 10:47:02 +0200	[diff] [blame]	905	build_balance_mask(sd, sg, mask);
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	906	cpu = cpumask_first_and(sched_group_span(sg), mask);
Lauro Ramos Venancio	8c03346	2017-04-13 10:56:07 -0300	[diff] [blame]	907
				908	sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);
				909	if (atomic_inc_return(&sg->sgc->ref) == 1)
Peter Zijlstra	e5c14b1	2017-05-01 10:47:02 +0200	[diff] [blame]	910	cpumask_copy(group_balance_mask(sg), mask);
Peter Zijlstra	35a566e	2017-04-28 10:54:26 +0200	[diff] [blame]	911	else
Peter Zijlstra	e5c14b1	2017-05-01 10:47:02 +0200	[diff] [blame]	912	WARN_ON_ONCE(!cpumask_equal(group_balance_mask(sg), mask));
Lauro Ramos Venancio	8c03346	2017-04-13 10:56:07 -0300	[diff] [blame]	913
				914	/*
				915	* Initialize sgc->capacity such that even if we mess up the
				916	* domains and no possible iteration will get us here, we won't
				917	* die on a /0 trap.
				918	*/
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	919	sg_span = sched_group_span(sg);
Lauro Ramos Venancio	8c03346	2017-04-13 10:56:07 -0300	[diff] [blame]	920	sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sg_span);
				921	sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
Morten Rasmussen	e3d6d0c	2018-07-04 11:17:41 +0100	[diff] [blame]	922	sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
Lauro Ramos Venancio	8c03346	2017-04-13 10:56:07 -0300	[diff] [blame]	923	}
				924
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	925	static int
				926	build_overlap_sched_groups(struct sched_domain *sd, int cpu)
				927	{
Peter Zijlstra	91eaed0	2017-04-14 17:32:07 +0200	[diff] [blame]	928	struct sched_group first = NULL, last = NULL, *sg;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	929	const struct cpumask *span = sched_domain_span(sd);
				930	struct cpumask *covered = sched_domains_tmpmask;
				931	struct sd_data *sdd = sd->private;
				932	struct sched_domain *sibling;
				933	int i;
				934
				935	cpumask_clear(covered);
				936
Peter Zijlstra	0372dd2	2017-04-14 17:24:02 +0200	[diff] [blame]	937	for_each_cpu_wrap(i, span, cpu) {
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	938	struct cpumask *sg_span;
				939
				940	if (cpumask_test_cpu(i, covered))
				941	continue;
				942
				943	sibling = *per_cpu_ptr(sdd->sd, i);
				944
Lauro Ramos Venancio	c20e1ea	2017-04-20 16:51:42 -0300	[diff] [blame]	945	/*
				946	* Asymmetric node setups can result in situations where the
				947	* domain tree is of unequal depth, make sure to skip domains
				948	* that already cover the entire range.
				949	*
				950	* In that case build_sched_domains() will have terminated the
				951	* iteration early and our sibling sd spans will be empty.
				952	* Domains should always include the CPU they're built on, so
				953	* check that.
				954	*/
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	955	if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
				956	continue;
				957
Lauro Ramos Venancio	8c03346	2017-04-13 10:56:07 -0300	[diff] [blame]	958	sg = build_group_from_child_sched_domain(sibling, cpu);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	959	if (!sg)
				960	goto fail;
				961
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	962	sg_span = sched_group_span(sg);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	963	cpumask_or(covered, covered, sg_span);
				964
Peter Zijlstra	1676330	2017-04-25 14:31:11 +0200	[diff] [blame]	965	init_overlap_sched_group(sd, sg);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	966
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	967	if (!first)
				968	first = sg;
				969	if (last)
				970	last->next = sg;
				971	last = sg;
				972	last->next = first;
				973	}
Peter Zijlstra	91eaed0	2017-04-14 17:32:07 +0200	[diff] [blame]	974	sd->groups = first;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	975
				976	return 0;
				977
				978	fail:
				979	free_sched_groups(first, 0);
				980
				981	return -ENOMEM;
				982	}
				983
Peter Zijlstra	35a566e	2017-04-28 10:54:26 +0200	[diff] [blame]	984
				985	/*
				986	* Package topology (also see the load-balance blurb in fair.c)
				987	*
				988	* The scheduler builds a tree structure to represent a number of important
				989	* topology features. By default (default_topology[]) these include:
				990	*
				991	* - Simultaneous multithreading (SMT)
				992	* - Multi-Core Cache (MC)
				993	* - Package (DIE)
				994	*
				995	* Where the last one more or less denotes everything up to a NUMA node.
				996	*
				997	* The tree consists of 3 primary data structures:
				998	*
				999	* sched_domain -> sched_group -> sched_group_capacity
				1000	* ^ ^ ^ ^
				1001	* `-' `-'
				1002	*
Ingo Molnar	97fb7a0	2018-03-03 14:01:12 +0100	[diff] [blame]	1003	* The sched_domains are per-CPU and have a two way link (parent & child) and
Peter Zijlstra	35a566e	2017-04-28 10:54:26 +0200	[diff] [blame]	1004	* denote the ever growing mask of CPUs belonging to that level of topology.
				1005	*
				1006	* Each sched_domain has a circular (double) linked list of sched_group's, each
				1007	* denoting the domains of the level below (or individual CPUs in case of the
				1008	* first domain level). The sched_group linked by a sched_domain includes the
				1009	* CPU of that sched_domain [*].
				1010	*
				1011	* Take for instance a 2 threaded, 2 core, 2 cache cluster part:
				1012	*
				1013	* CPU 0 1 2 3 4 5 6 7
				1014	*
				1015	* DIE [ ]
				1016	* MC [ ] [ ]
				1017	* SMT [ ] [ ] [ ] [ ]
				1018	*
				1019	* - or -
				1020	*
				1021	* DIE 0-7 0-7 0-7 0-7 0-7 0-7 0-7 0-7
				1022	* MC 0-3 0-3 0-3 0-3 4-7 4-7 4-7 4-7
				1023	* SMT 0-1 0-1 2-3 2-3 4-5 4-5 6-7 6-7
				1024	*
				1025	* CPU 0 1 2 3 4 5 6 7
				1026	*
				1027	* One way to think about it is: sched_domain moves you up and down among these
				1028	* topology levels, while sched_group moves you sideways through it, at child
				1029	* domain granularity.
				1030	*
				1031	* sched_group_capacity ensures each unique sched_group has shared storage.
				1032	*
				1033	* There are two related construction problems, both require a CPU that
				1034	* uniquely identify each group (for a given domain):
				1035	*
				1036	* - The first is the balance_cpu (see should_we_balance() and the
				1037	* load-balance blub in fair.c); for each group we only want 1 CPU to
				1038	* continue balancing at a higher domain.
				1039	*
				1040	* - The second is the sched_group_capacity; we want all identical groups
				1041	* to share a single sched_group_capacity.
				1042	*
				1043	* Since these topologies are exclusive by construction. That is, its
				1044	* impossible for an SMT thread to belong to multiple cores, and cores to
				1045	* be part of multiple caches. There is a very clear and unique location
				1046	* for each CPU in the hierarchy.
				1047	*
				1048	* Therefore computing a unique CPU for each group is trivial (the iteration
				1049	* mask is redundant and set all 1s; all CPUs in a group will end up at _that_
				1050	* group), we can simply pick the first CPU in each group.
				1051	*
				1052	*
				1053	* [*] in other words, the first group of each domain is its child domain.
				1054	*/
				1055
Peter Zijlstra	0c0e776	2017-05-03 14:18:06 +0200	[diff] [blame]	1056	static struct sched_group get_group(int cpu, struct sd_data sdd)
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1057	{
				1058	struct sched_domain sd = per_cpu_ptr(sdd->sd, cpu);
				1059	struct sched_domain *child = sd->child;
Peter Zijlstra	0c0e776	2017-05-03 14:18:06 +0200	[diff] [blame]	1060	struct sched_group *sg;
Valentin Schneider	67d4f6f	2019-04-09 18:35:45 +0100	[diff] [blame]	1061	bool already_visited;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1062
				1063	if (child)
				1064	cpu = cpumask_first(sched_domain_span(child));
				1065
Peter Zijlstra	0c0e776	2017-05-03 14:18:06 +0200	[diff] [blame]	1066	sg = *per_cpu_ptr(sdd->sg, cpu);
				1067	sg->sgc = *per_cpu_ptr(sdd->sgc, cpu);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1068
Valentin Schneider	67d4f6f	2019-04-09 18:35:45 +0100	[diff] [blame]	1069	/* Increase refcounts for claim_allocations: */
				1070	already_visited = atomic_inc_return(&sg->ref) > 1;
				1071	/* sgc visits should follow a similar trend as sg */
				1072	WARN_ON(already_visited != (atomic_inc_return(&sg->sgc->ref) > 1));
				1073
				1074	/* If we have already visited that group, it's already initialized. */
				1075	if (already_visited)
				1076	return sg;
Peter Zijlstra	0c0e776	2017-05-03 14:18:06 +0200	[diff] [blame]	1077
				1078	if (child) {
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	1079	cpumask_copy(sched_group_span(sg), sched_domain_span(child));
				1080	cpumask_copy(group_balance_mask(sg), sched_group_span(sg));
Peter Zijlstra	0c0e776	2017-05-03 14:18:06 +0200	[diff] [blame]	1081	} else {
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	1082	cpumask_set_cpu(cpu, sched_group_span(sg));
Peter Zijlstra	e5c14b1	2017-05-01 10:47:02 +0200	[diff] [blame]	1083	cpumask_set_cpu(cpu, group_balance_mask(sg));
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1084	}
				1085
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	1086	sg->sgc->capacity = SCHED_CAPACITY_SCALE * cpumask_weight(sched_group_span(sg));
Peter Zijlstra	0c0e776	2017-05-03 14:18:06 +0200	[diff] [blame]	1087	sg->sgc->min_capacity = SCHED_CAPACITY_SCALE;
Morten Rasmussen	e3d6d0c	2018-07-04 11:17:41 +0100	[diff] [blame]	1088	sg->sgc->max_capacity = SCHED_CAPACITY_SCALE;
Peter Zijlstra	0c0e776	2017-05-03 14:18:06 +0200	[diff] [blame]	1089
				1090	return sg;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1091	}
				1092
				1093	/*
				1094	* build_sched_groups will build a circular linked list of the groups
Valentin Schneider	d874323	2019-04-09 18:35:46 +0100	[diff] [blame]	1095	* covered by the given span, will set each group's ->cpumask correctly,
				1096	* and will initialize their ->sgc.
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1097	*
				1098	* Assumes the sched_domain tree is fully constructed
				1099	*/
				1100	static int
				1101	build_sched_groups(struct sched_domain *sd, int cpu)
				1102	{
				1103	struct sched_group first = NULL, last = NULL;
				1104	struct sd_data *sdd = sd->private;
				1105	const struct cpumask *span = sched_domain_span(sd);
				1106	struct cpumask *covered;
				1107	int i;
				1108
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1109	lockdep_assert_held(&sched_domains_mutex);
				1110	covered = sched_domains_tmpmask;
				1111
				1112	cpumask_clear(covered);
				1113
Peter Zijlstra	0c0e776	2017-05-03 14:18:06 +0200	[diff] [blame]	1114	for_each_cpu_wrap(i, span, cpu) {
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1115	struct sched_group *sg;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1116
				1117	if (cpumask_test_cpu(i, covered))
				1118	continue;
				1119
Peter Zijlstra	0c0e776	2017-05-03 14:18:06 +0200	[diff] [blame]	1120	sg = get_group(i, sdd);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1121
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	1122	cpumask_or(covered, covered, sched_group_span(sg));
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1123
				1124	if (!first)
				1125	first = sg;
				1126	if (last)
				1127	last->next = sg;
				1128	last = sg;
				1129	}
				1130	last->next = first;
Peter Zijlstra	0c0e776	2017-05-03 14:18:06 +0200	[diff] [blame]	1131	sd->groups = first;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1132
				1133	return 0;
				1134	}
				1135
				1136	/*
				1137	* Initialize sched groups cpu_capacity.
				1138	*
				1139	* cpu_capacity indicates the capacity of sched group, which is used while
				1140	* distributing the load between different sched groups in a sched domain.
				1141	* Typically cpu_capacity for all the groups in a sched domain will be same
				1142	* unless there are asymmetries in the topology. If there are asymmetries,
				1143	* group having more cpu_capacity will pickup more load compared to the
				1144	* group having less cpu_capacity.
				1145	*/
				1146	static void init_sched_groups_capacity(int cpu, struct sched_domain *sd)
				1147	{
				1148	struct sched_group *sg = sd->groups;
				1149
				1150	WARN_ON(!sg);
				1151
				1152	do {
				1153	int cpu, max_cpu = -1;
				1154
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	1155	sg->group_weight = cpumask_weight(sched_group_span(sg));
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1156
				1157	if (!(sd->flags & SD_ASYM_PACKING))
				1158	goto next;
				1159
Peter Zijlstra	ae4df9d	2017-05-01 11:03:12 +0200	[diff] [blame]	1160	for_each_cpu(cpu, sched_group_span(sg)) {
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1161	if (max_cpu < 0)
				1162	max_cpu = cpu;
				1163	else if (sched_asym_prefer(cpu, max_cpu))
				1164	max_cpu = cpu;
				1165	}
				1166	sg->asym_prefer_cpu = max_cpu;
				1167
				1168	next:
				1169	sg = sg->next;
				1170	} while (sg != sd->groups);
				1171
				1172	if (cpu != group_balance_cpu(sg))
				1173	return;
				1174
				1175	update_group_capacity(sd, cpu);
				1176	}
				1177
				1178	/*
				1179	* Initializers for schedule domains
				1180	* Non-inlined to reduce accumulated stack pressure in build_sched_domains()
				1181	*/
				1182
				1183	static int default_relax_domain_level = -1;
				1184	int sched_domain_level_max;
				1185
				1186	static int __init setup_relax_domain_level(char *str)
				1187	{
				1188	if (kstrtoint(str, 0, &default_relax_domain_level))
				1189	pr_warn("Unable to set relax_domain_level\n");
				1190
				1191	return 1;
				1192	}
				1193	__setup("relax_domain_level=", setup_relax_domain_level);
				1194
				1195	static void set_domain_attribute(struct sched_domain *sd,
				1196	struct sched_domain_attr *attr)
				1197	{
				1198	int request;
				1199
				1200	if (!attr \|\| attr->relax_domain_level < 0) {
				1201	if (default_relax_domain_level < 0)
				1202	return;
Valentin Schneider	9ae7ab2	2019-10-14 17:44:08 +0100	[diff] [blame]	1203	request = default_relax_domain_level;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1204	} else
				1205	request = attr->relax_domain_level;
Valentin Schneider	9ae7ab2	2019-10-14 17:44:08 +0100	[diff] [blame]	1206
				1207	if (sd->level > request) {
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1208	/* Turn off idle balance on this domain: */
				1209	sd->flags &= ~(SD_BALANCE_WAKE\|SD_BALANCE_NEWIDLE);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1210	}
				1211	}
				1212
				1213	static void __sdt_free(const struct cpumask *cpu_map);
				1214	static int __sdt_alloc(const struct cpumask *cpu_map);
				1215
				1216	static void __free_domain_allocs(struct s_data *d, enum s_alloc what,
				1217	const struct cpumask *cpu_map)
				1218	{
				1219	switch (what) {
				1220	case sa_rootdomain:
				1221	if (!atomic_read(&d->rd->refcount))
				1222	free_rootdomain(&d->rd->rcu);
				1223	/* Fall through */
				1224	case sa_sd:
				1225	free_percpu(d->sd);
				1226	/* Fall through */
				1227	case sa_sd_storage:
				1228	__sdt_free(cpu_map);
				1229	/* Fall through */
				1230	case sa_none:
				1231	break;
				1232	}
				1233	}
				1234
				1235	static enum s_alloc
				1236	__visit_domain_allocation_hell(struct s_data d, const struct cpumask cpu_map)
				1237	{
				1238	memset(d, 0, sizeof(*d));
				1239
				1240	if (__sdt_alloc(cpu_map))
				1241	return sa_sd_storage;
				1242	d->sd = alloc_percpu(struct sched_domain *);
				1243	if (!d->sd)
				1244	return sa_sd_storage;
				1245	d->rd = alloc_rootdomain();
				1246	if (!d->rd)
				1247	return sa_sd;
Ingo Molnar	97fb7a0	2018-03-03 14:01:12 +0100	[diff] [blame]	1248
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1249	return sa_rootdomain;
				1250	}
				1251
				1252	/*
				1253	* NULL the sd_data elements we've used to build the sched_domain and
				1254	* sched_group structure so that the subsequent __free_domain_allocs()
				1255	* will not free the data we're using.
				1256	*/
				1257	static void claim_allocations(int cpu, struct sched_domain *sd)
				1258	{
				1259	struct sd_data *sdd = sd->private;
				1260
				1261	WARN_ON_ONCE(*per_cpu_ptr(sdd->sd, cpu) != sd);
				1262	*per_cpu_ptr(sdd->sd, cpu) = NULL;
				1263
				1264	if (atomic_read(&(*per_cpu_ptr(sdd->sds, cpu))->ref))
				1265	*per_cpu_ptr(sdd->sds, cpu) = NULL;
				1266
				1267	if (atomic_read(&(*per_cpu_ptr(sdd->sg, cpu))->ref))
				1268	*per_cpu_ptr(sdd->sg, cpu) = NULL;
				1269
				1270	if (atomic_read(&(*per_cpu_ptr(sdd->sgc, cpu))->ref))
				1271	*per_cpu_ptr(sdd->sgc, cpu) = NULL;
				1272	}
				1273
				1274	#ifdef CONFIG_NUMA
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1275	enum numa_topology_type sched_numa_topology_type;
Ingo Molnar	97fb7a0	2018-03-03 14:01:12 +0100	[diff] [blame]	1276
				1277	static int sched_domains_numa_levels;
				1278	static int sched_domains_curr_level;
				1279
				1280	int sched_max_numa_distance;
				1281	static int *sched_domains_numa_distance;
				1282	static struct cpumask ***sched_domains_numa_masks;
Matt Fleming	a55c745	2019-08-08 20:53:01 +0100	[diff] [blame]	1283	int __read_mostly node_reclaim_distance = RECLAIM_DISTANCE;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1284	#endif
				1285
				1286	/*
				1287	* SD_flags allowed in topology descriptions.
				1288	*
				1289	* These flags are purely descriptive of the topology and do not prescribe
				1290	* behaviour. Behaviour is artificial and mapped in the below sd_init()
				1291	* function:
				1292	*
				1293	* SD_SHARE_CPUCAPACITY - describes SMT topologies
				1294	* SD_SHARE_PKG_RESOURCES - describes shared caches
				1295	* SD_NUMA - describes NUMA topologies
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1296	*
				1297	* Odd one out, which beside describing the topology has a quirk also
				1298	* prescribes the desired behaviour that goes along with it:
				1299	*
				1300	* SD_ASYM_PACKING - describes SMT quirks
				1301	*/
				1302	#define TOPOLOGY_SD_FLAGS \
Ingo Molnar	97fb7a0	2018-03-03 14:01:12 +0100	[diff] [blame]	1303	(SD_SHARE_CPUCAPACITY \| \
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1304	SD_SHARE_PKG_RESOURCES \| \
Ingo Molnar	97fb7a0	2018-03-03 14:01:12 +0100	[diff] [blame]	1305	SD_NUMA \| \
Valentin Schneider	cfe7ddc	2020-08-17 12:29:47 +0100	[diff] [blame]	1306	SD_ASYM_PACKING)
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1307
				1308	static struct sched_domain *
				1309	sd_init(struct sched_domain_topology_level *tl,
				1310	const struct cpumask *cpu_map,
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	1311	struct sched_domain *child, int dflags, int cpu)
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1312	{
				1313	struct sd_data *sdd = &tl->data;
				1314	struct sched_domain sd = per_cpu_ptr(sdd->sd, cpu);
				1315	int sd_id, sd_weight, sd_flags = 0;
				1316
				1317	#ifdef CONFIG_NUMA
				1318	/*
				1319	* Ugly hack to pass state to sd_numa_mask()...
				1320	*/
				1321	sched_domains_curr_level = tl->numa_level;
				1322	#endif
				1323
				1324	sd_weight = cpumask_weight(tl->mask(cpu));
				1325
				1326	if (tl->sd_flags)
				1327	sd_flags = (*tl->sd_flags)();
				1328	if (WARN_ONCE(sd_flags & ~TOPOLOGY_SD_FLAGS,
				1329	"wrong sd_flags in topology description\n"))
Peng Liu	9b1b234	2020-06-09 23:09:36 +0800	[diff] [blame]	1330	sd_flags &= TOPOLOGY_SD_FLAGS;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1331
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	1332	/* Apply detected topology flags */
				1333	sd_flags \|= dflags;
				1334
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1335	*sd = (struct sched_domain){
				1336	.min_interval = sd_weight,
				1337	.max_interval = 2*sd_weight,
				1338	.busy_factor = 32,
				1339	.imbalance_pct = 125,
				1340
				1341	.cache_nice_tries = 0,
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1342
Valentin Schneider	36c5bdc	2020-04-15 22:05:07 +0100	[diff] [blame]	1343	.flags = 1*SD_BALANCE_NEWIDLE
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1344	\| 1*SD_BALANCE_EXEC
				1345	\| 1*SD_BALANCE_FORK
				1346	\| 0*SD_BALANCE_WAKE
				1347	\| 1*SD_WAKE_AFFINE
				1348	\| 0*SD_SHARE_CPUCAPACITY
				1349	\| 0*SD_SHARE_PKG_RESOURCES
				1350	\| 0*SD_SERIALIZE
Morten Rasmussen	9c63e84	2018-07-04 11:17:50 +0100	[diff] [blame]	1351	\| 1*SD_PREFER_SIBLING
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1352	\| 0*SD_NUMA
				1353	\| sd_flags
				1354	,
				1355
				1356	.last_balance = jiffies,
				1357	.balance_interval = sd_weight,
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1358	.max_newidle_lb_cost = 0,
				1359	.next_decay_max_lb_cost = jiffies,
				1360	.child = child,
				1361	#ifdef CONFIG_SCHED_DEBUG
				1362	.name = tl->name,
				1363	#endif
				1364	};
				1365
				1366	cpumask_and(sched_domain_span(sd), cpu_map, tl->mask(cpu));
				1367	sd_id = cpumask_first(sched_domain_span(sd));
				1368
				1369	/*
				1370	* Convert topological properties into behaviour.
				1371	*/
				1372
Morten Rasmussen	a526d46	2020-02-06 19:19:55 +0000	[diff] [blame]	1373	/* Don't attempt to spread across CPUs of different capacities. */
				1374	if ((sd->flags & SD_ASYM_CPUCAPACITY) && sd->child)
				1375	sd->child->flags &= ~SD_PREFER_SIBLING;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1376
				1377	if (sd->flags & SD_SHARE_CPUCAPACITY) {
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1378	sd->imbalance_pct = 110;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1379
				1380	} else if (sd->flags & SD_SHARE_PKG_RESOURCES) {
				1381	sd->imbalance_pct = 117;
				1382	sd->cache_nice_tries = 1;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1383
				1384	#ifdef CONFIG_NUMA
				1385	} else if (sd->flags & SD_NUMA) {
				1386	sd->cache_nice_tries = 2;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1387
Morten Rasmussen	9c63e84	2018-07-04 11:17:50 +0100	[diff] [blame]	1388	sd->flags &= ~SD_PREFER_SIBLING;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1389	sd->flags \|= SD_SERIALIZE;
Matt Fleming	a55c745	2019-08-08 20:53:01 +0100	[diff] [blame]	1390	if (sched_domains_numa_distance[tl->numa_level] > node_reclaim_distance) {
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1391	sd->flags &= ~(SD_BALANCE_EXEC \|
				1392	SD_BALANCE_FORK \|
				1393	SD_WAKE_AFFINE);
				1394	}
				1395
				1396	#endif
				1397	} else {
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1398	sd->cache_nice_tries = 1;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1399	}
				1400
				1401	/*
				1402	* For all levels sharing cache; connect a sched_domain_shared
				1403	* instance.
				1404	*/
				1405	if (sd->flags & SD_SHARE_PKG_RESOURCES) {
				1406	sd->shared = *per_cpu_ptr(sdd->sds, sd_id);
				1407	atomic_inc(&sd->shared->ref);
				1408	atomic_set(&sd->shared->nr_busy_cpus, sd_weight);
				1409	}
				1410
				1411	sd->private = sdd;
				1412
				1413	return sd;
				1414	}
				1415
				1416	/*
				1417	* Topology list, bottom-up.
				1418	*/
				1419	static struct sched_domain_topology_level default_topology[] = {
				1420	#ifdef CONFIG_SCHED_SMT
				1421	{ cpu_smt_mask, cpu_smt_flags, SD_INIT_NAME(SMT) },
				1422	#endif
				1423	#ifdef CONFIG_SCHED_MC
				1424	{ cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) },
				1425	#endif
				1426	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
				1427	{ NULL, },
				1428	};
				1429
				1430	static struct sched_domain_topology_level *sched_domain_topology =
				1431	default_topology;
				1432
				1433	#define for_each_sd_topology(tl) \
				1434	for (tl = sched_domain_topology; tl->mask; tl++)
				1435
				1436	void set_sched_topology(struct sched_domain_topology_level *tl)
				1437	{
				1438	if (WARN_ON_ONCE(sched_smp_initialized))
				1439	return;
				1440
				1441	sched_domain_topology = tl;
				1442	}
				1443
				1444	#ifdef CONFIG_NUMA
				1445
				1446	static const struct cpumask *sd_numa_mask(int cpu)
				1447	{
				1448	return sched_domains_numa_masks[sched_domains_curr_level][cpu_to_node(cpu)];
				1449	}
				1450
				1451	static void sched_numa_warn(const char *str)
				1452	{
				1453	static int done = false;
				1454	int i,j;
				1455
				1456	if (done)
				1457	return;
				1458
				1459	done = true;
				1460
				1461	printk(KERN_WARNING "ERROR: %s\n\n", str);
				1462
				1463	for (i = 0; i < nr_node_ids; i++) {
				1464	printk(KERN_WARNING " ");
				1465	for (j = 0; j < nr_node_ids; j++)
				1466	printk(KERN_CONT "%02d ", node_distance(i,j));
				1467	printk(KERN_CONT "\n");
				1468	}
				1469	printk(KERN_WARNING "\n");
				1470	}
				1471
				1472	bool find_numa_distance(int distance)
				1473	{
				1474	int i;
				1475
				1476	if (distance == node_distance(0, 0))
				1477	return true;
				1478
				1479	for (i = 0; i < sched_domains_numa_levels; i++) {
				1480	if (sched_domains_numa_distance[i] == distance)
				1481	return true;
				1482	}
				1483
				1484	return false;
				1485	}
				1486
				1487	/*
				1488	* A system can have three types of NUMA topology:
				1489	* NUMA_DIRECT: all nodes are directly connected, or not a NUMA system
				1490	* NUMA_GLUELESS_MESH: some nodes reachable through intermediary nodes
				1491	* NUMA_BACKPLANE: nodes can reach other nodes through a backplane
				1492	*
				1493	* The difference between a glueless mesh topology and a backplane
				1494	* topology lies in whether communication between not directly
				1495	* connected nodes goes through intermediary nodes (where programs
				1496	* could run), or through backplane controllers. This affects
				1497	* placement of programs.
				1498	*
				1499	* The type of topology can be discerned with the following tests:
				1500	* - If the maximum distance between any nodes is 1 hop, the system
				1501	* is directly connected.
				1502	* - If for two nodes A and B, located N > 1 hops away from each other,
				1503	* there is an intermediary node C, which is < N hops away from both
				1504	* nodes A and B, the system is a glueless mesh.
				1505	*/
				1506	static void init_numa_topology_type(void)
				1507	{
				1508	int a, b, c, n;
				1509
				1510	n = sched_max_numa_distance;
				1511
Srikar Dronamraju	e5e96fa	2018-08-10 22:30:18 +0530	[diff] [blame]	1512	if (sched_domains_numa_levels <= 2) {
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1513	sched_numa_topology_type = NUMA_DIRECT;
				1514	return;
				1515	}
				1516
				1517	for_each_online_node(a) {
				1518	for_each_online_node(b) {
				1519	/* Find two nodes furthest removed from each other. */
				1520	if (node_distance(a, b) < n)
				1521	continue;
				1522
				1523	/* Is there an intermediary node between a and b? */
				1524	for_each_online_node(c) {
				1525	if (node_distance(a, c) < n &&
				1526	node_distance(b, c) < n) {
				1527	sched_numa_topology_type =
				1528	NUMA_GLUELESS_MESH;
				1529	return;
				1530	}
				1531	}
				1532
				1533	sched_numa_topology_type = NUMA_BACKPLANE;
				1534	return;
				1535	}
				1536	}
				1537	}
				1538
				1539	void sched_init_numa(void)
				1540	{
				1541	int next_distance, curr_distance = node_distance(0, 0);
				1542	struct sched_domain_topology_level *tl;
				1543	int level = 0;
				1544	int i, j, k;
				1545
Peter Zijlstra	993f0b0	2018-11-02 14:22:25 +0100	[diff] [blame]	1546	sched_domains_numa_distance = kzalloc(sizeof(int) * (nr_node_ids + 1), GFP_KERNEL);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1547	if (!sched_domains_numa_distance)
				1548	return;
				1549
Suravee Suthikulpanit	051f3ca	2017-09-07 02:20:05 -0500	[diff] [blame]	1550	/* Includes NUMA identity node at level 0. */
				1551	sched_domains_numa_distance[level++] = curr_distance;
				1552	sched_domains_numa_levels = level;
				1553
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1554	/*
				1555	* O(nr_nodes^2) deduplicating selection sort -- in order to find the
				1556	* unique distances in the node_distance() table.
				1557	*
				1558	* Assumes node_distance(0,j) includes all distances in
				1559	* node_distance(i,j) in order to avoid cubic time.
				1560	*/
				1561	next_distance = curr_distance;
				1562	for (i = 0; i < nr_node_ids; i++) {
				1563	for (j = 0; j < nr_node_ids; j++) {
				1564	for (k = 0; k < nr_node_ids; k++) {
				1565	int distance = node_distance(i, k);
				1566
				1567	if (distance > curr_distance &&
				1568	(distance < next_distance \|\|
				1569	next_distance == curr_distance))
				1570	next_distance = distance;
				1571
				1572	/*
				1573	* While not a strong assumption it would be nice to know
				1574	* about cases where if node A is connected to B, B is not
				1575	* equally connected to A.
				1576	*/
				1577	if (sched_debug() && node_distance(k, i) != distance)
				1578	sched_numa_warn("Node-distance not symmetric");
				1579
				1580	if (sched_debug() && i && !find_numa_distance(distance))
				1581	sched_numa_warn("Node-0 not representative");
				1582	}
				1583	if (next_distance != curr_distance) {
				1584	sched_domains_numa_distance[level++] = next_distance;
				1585	sched_domains_numa_levels = level;
				1586	curr_distance = next_distance;
				1587	} else break;
				1588	}
				1589
				1590	/*
				1591	* In case of sched_debug() we verify the above assumption.
				1592	*/
				1593	if (!sched_debug())
				1594	break;
				1595	}
				1596
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1597	/*
Suravee Suthikulpanit	051f3ca	2017-09-07 02:20:05 -0500	[diff] [blame]	1598	* 'level' contains the number of unique distances
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1599	*
				1600	* The sched_domains_numa_distance[] array includes the actual distance
				1601	* numbers.
				1602	*/
				1603
				1604	/*
				1605	* Here, we should temporarily reset sched_domains_numa_levels to 0.
				1606	* If it fails to allocate memory for array sched_domains_numa_masks[][],
				1607	* the array will contain less then 'level' members. This could be
				1608	* dangerous when we use it to iterate array sched_domains_numa_masks[][]
				1609	* in other functions.
				1610	*
				1611	* We reset it to 'level' at the end of this function.
				1612	*/
				1613	sched_domains_numa_levels = 0;
				1614
				1615	sched_domains_numa_masks = kzalloc(sizeof(void ) level, GFP_KERNEL);
				1616	if (!sched_domains_numa_masks)
				1617	return;
				1618
				1619	/*
				1620	* Now for each level, construct a mask per node which contains all
				1621	* CPUs of nodes that are that many hops away from us.
				1622	*/
				1623	for (i = 0; i < level; i++) {
				1624	sched_domains_numa_masks[i] =
				1625	kzalloc(nr_node_ids * sizeof(void *), GFP_KERNEL);
				1626	if (!sched_domains_numa_masks[i])
				1627	return;
				1628
				1629	for (j = 0; j < nr_node_ids; j++) {
				1630	struct cpumask *mask = kzalloc(cpumask_size(), GFP_KERNEL);
				1631	if (!mask)
				1632	return;
				1633
				1634	sched_domains_numa_masks[i][j] = mask;
				1635
				1636	for_each_node(k) {
				1637	if (node_distance(j, k) > sched_domains_numa_distance[i])
				1638	continue;
				1639
				1640	cpumask_or(mask, mask, cpumask_of_node(k));
				1641	}
				1642	}
				1643	}
				1644
				1645	/* Compute default topology size */
				1646	for (i = 0; sched_domain_topology[i].mask; i++);
				1647
				1648	tl = kzalloc((i + level + 1) *
				1649	sizeof(struct sched_domain_topology_level), GFP_KERNEL);
				1650	if (!tl)
				1651	return;
				1652
				1653	/*
				1654	* Copy the default topology bits..
				1655	*/
				1656	for (i = 0; sched_domain_topology[i].mask; i++)
				1657	tl[i] = sched_domain_topology[i];
				1658
				1659	/*
Suravee Suthikulpanit	051f3ca	2017-09-07 02:20:05 -0500	[diff] [blame]	1660	* Add the NUMA identity distance, aka single NODE.
				1661	*/
				1662	tl[i++] = (struct sched_domain_topology_level){
				1663	.mask = sd_numa_mask,
				1664	.numa_level = 0,
				1665	SD_INIT_NAME(NODE)
				1666	};
				1667
				1668	/*
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1669	* .. and append 'j' levels of NUMA goodness.
				1670	*/
Suravee Suthikulpanit	051f3ca	2017-09-07 02:20:05 -0500	[diff] [blame]	1671	for (j = 1; j < level; i++, j++) {
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1672	tl[i] = (struct sched_domain_topology_level){
				1673	.mask = sd_numa_mask,
				1674	.sd_flags = cpu_numa_flags,
				1675	.flags = SDTL_OVERLAP,
				1676	.numa_level = j,
				1677	SD_INIT_NAME(NUMA)
				1678	};
				1679	}
				1680
				1681	sched_domain_topology = tl;
				1682
				1683	sched_domains_numa_levels = level;
				1684	sched_max_numa_distance = sched_domains_numa_distance[level - 1];
				1685
				1686	init_numa_topology_type();
				1687	}
				1688
				1689	void sched_domains_numa_masks_set(unsigned int cpu)
				1690	{
				1691	int node = cpu_to_node(cpu);
				1692	int i, j;
				1693
				1694	for (i = 0; i < sched_domains_numa_levels; i++) {
				1695	for (j = 0; j < nr_node_ids; j++) {
				1696	if (node_distance(j, node) <= sched_domains_numa_distance[i])
				1697	cpumask_set_cpu(cpu, sched_domains_numa_masks[i][j]);
				1698	}
				1699	}
				1700	}
				1701
				1702	void sched_domains_numa_masks_clear(unsigned int cpu)
				1703	{
				1704	int i, j;
				1705
				1706	for (i = 0; i < sched_domains_numa_levels; i++) {
				1707	for (j = 0; j < nr_node_ids; j++)
				1708	cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]);
				1709	}
				1710	}
				1711
Wanpeng Li	e0e8d49	2019-06-28 16:51:41 +0800	[diff] [blame]	1712	/*
				1713	* sched_numa_find_closest() - given the NUMA topology, find the cpu
				1714	* closest to @cpu from @cpumask.
				1715	* cpumask: cpumask to find a cpu from
				1716	* cpu: cpu to be close to
				1717	*
				1718	* returns: cpu, or nr_cpu_ids when nothing found.
				1719	*/
				1720	int sched_numa_find_closest(const struct cpumask *cpus, int cpu)
				1721	{
				1722	int i, j = cpu_to_node(cpu);
				1723
				1724	for (i = 0; i < sched_domains_numa_levels; i++) {
				1725	cpu = cpumask_any_and(cpus, sched_domains_numa_masks[i][j]);
				1726	if (cpu < nr_cpu_ids)
				1727	return cpu;
				1728	}
				1729	return nr_cpu_ids;
				1730	}
				1731
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1732	#endif /* CONFIG_NUMA */
				1733
				1734	static int __sdt_alloc(const struct cpumask *cpu_map)
				1735	{
				1736	struct sched_domain_topology_level *tl;
				1737	int j;
				1738
				1739	for_each_sd_topology(tl) {
				1740	struct sd_data *sdd = &tl->data;
				1741
				1742	sdd->sd = alloc_percpu(struct sched_domain *);
				1743	if (!sdd->sd)
				1744	return -ENOMEM;
				1745
				1746	sdd->sds = alloc_percpu(struct sched_domain_shared *);
				1747	if (!sdd->sds)
				1748	return -ENOMEM;
				1749
				1750	sdd->sg = alloc_percpu(struct sched_group *);
				1751	if (!sdd->sg)
				1752	return -ENOMEM;
				1753
				1754	sdd->sgc = alloc_percpu(struct sched_group_capacity *);
				1755	if (!sdd->sgc)
				1756	return -ENOMEM;
				1757
				1758	for_each_cpu(j, cpu_map) {
				1759	struct sched_domain *sd;
				1760	struct sched_domain_shared *sds;
				1761	struct sched_group *sg;
				1762	struct sched_group_capacity *sgc;
				1763
				1764	sd = kzalloc_node(sizeof(struct sched_domain) + cpumask_size(),
				1765	GFP_KERNEL, cpu_to_node(j));
				1766	if (!sd)
				1767	return -ENOMEM;
				1768
				1769	*per_cpu_ptr(sdd->sd, j) = sd;
				1770
				1771	sds = kzalloc_node(sizeof(struct sched_domain_shared),
				1772	GFP_KERNEL, cpu_to_node(j));
				1773	if (!sds)
				1774	return -ENOMEM;
				1775
				1776	*per_cpu_ptr(sdd->sds, j) = sds;
				1777
				1778	sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
				1779	GFP_KERNEL, cpu_to_node(j));
				1780	if (!sg)
				1781	return -ENOMEM;
				1782
				1783	sg->next = sg;
				1784
				1785	*per_cpu_ptr(sdd->sg, j) = sg;
				1786
				1787	sgc = kzalloc_node(sizeof(struct sched_group_capacity) + cpumask_size(),
				1788	GFP_KERNEL, cpu_to_node(j));
				1789	if (!sgc)
				1790	return -ENOMEM;
				1791
Peter Zijlstra	005f874	2017-04-26 17:35:35 +0200	[diff] [blame]	1792	#ifdef CONFIG_SCHED_DEBUG
				1793	sgc->id = j;
				1794	#endif
				1795
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1796	*per_cpu_ptr(sdd->sgc, j) = sgc;
				1797	}
				1798	}
				1799
				1800	return 0;
				1801	}
				1802
				1803	static void __sdt_free(const struct cpumask *cpu_map)
				1804	{
				1805	struct sched_domain_topology_level *tl;
				1806	int j;
				1807
				1808	for_each_sd_topology(tl) {
				1809	struct sd_data *sdd = &tl->data;
				1810
				1811	for_each_cpu(j, cpu_map) {
				1812	struct sched_domain *sd;
				1813
				1814	if (sdd->sd) {
				1815	sd = *per_cpu_ptr(sdd->sd, j);
				1816	if (sd && (sd->flags & SD_OVERLAP))
				1817	free_sched_groups(sd->groups, 0);
				1818	kfree(*per_cpu_ptr(sdd->sd, j));
				1819	}
				1820
				1821	if (sdd->sds)
				1822	kfree(*per_cpu_ptr(sdd->sds, j));
				1823	if (sdd->sg)
				1824	kfree(*per_cpu_ptr(sdd->sg, j));
				1825	if (sdd->sgc)
				1826	kfree(*per_cpu_ptr(sdd->sgc, j));
				1827	}
				1828	free_percpu(sdd->sd);
				1829	sdd->sd = NULL;
				1830	free_percpu(sdd->sds);
				1831	sdd->sds = NULL;
				1832	free_percpu(sdd->sg);
				1833	sdd->sg = NULL;
				1834	free_percpu(sdd->sgc);
				1835	sdd->sgc = NULL;
				1836	}
				1837	}
				1838
Viresh Kumar	181a80d1	2017-04-27 13:58:59 +0530	[diff] [blame]	1839	static struct sched_domain build_sched_domain(struct sched_domain_topology_level tl,
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1840	const struct cpumask cpu_map, struct sched_domain_attr attr,
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	1841	struct sched_domain *child, int dflags, int cpu)
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1842	{
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	1843	struct sched_domain *sd = sd_init(tl, cpu_map, child, dflags, cpu);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1844
				1845	if (child) {
				1846	sd->level = child->level + 1;
				1847	sched_domain_level_max = max(sched_domain_level_max, sd->level);
				1848	child->parent = sd;
				1849
				1850	if (!cpumask_subset(sched_domain_span(child),
				1851	sched_domain_span(sd))) {
				1852	pr_err("BUG: arch topology borken\n");
				1853	#ifdef CONFIG_SCHED_DEBUG
				1854	pr_err(" the %s domain not a subset of the %s domain\n",
				1855	child->name, sd->name);
				1856	#endif
Ingo Molnar	97fb7a0	2018-03-03 14:01:12 +0100	[diff] [blame]	1857	/* Fixup, ensure @sd has at least @child CPUs. */
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1858	cpumask_or(sched_domain_span(sd),
				1859	sched_domain_span(sd),
				1860	sched_domain_span(child));
				1861	}
				1862
				1863	}
				1864	set_domain_attribute(sd, attr);
				1865
				1866	return sd;
				1867	}
				1868
				1869	/*
Valentin Schneider	ccf7412	2020-01-15 16:09:15 +0000	[diff] [blame]	1870	* Ensure topology masks are sane, i.e. there are no conflicts (overlaps) for
				1871	* any two given CPUs at this (non-NUMA) topology level.
				1872	*/
				1873	static bool topology_span_sane(struct sched_domain_topology_level *tl,
				1874	const struct cpumask *cpu_map, int cpu)
				1875	{
				1876	int i;
				1877
				1878	/* NUMA levels are allowed to overlap */
				1879	if (tl->flags & SDTL_OVERLAP)
				1880	return true;
				1881
				1882	/*
				1883	* Non-NUMA levels cannot partially overlap - they must be either
				1884	* completely equal or completely disjoint. Otherwise we can end up
				1885	* breaking the sched_group lists - i.e. a later get_group() pass
				1886	* breaks the linking done for an earlier span.
				1887	*/
				1888	for_each_cpu(i, cpu_map) {
				1889	if (i == cpu)
				1890	continue;
				1891	/*
				1892	* We should 'and' all those masks with 'cpu_map' to exactly
				1893	* match the topology we're about to build, but that can only
				1894	* remove CPUs, which only lessens our ability to detect
				1895	* overlaps
				1896	*/
				1897	if (!cpumask_equal(tl->mask(cpu), tl->mask(i)) &&
				1898	cpumask_intersects(tl->mask(cpu), tl->mask(i)))
				1899	return false;
				1900	}
				1901
				1902	return true;
				1903	}
				1904
				1905	/*
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	1906	* Find the sched_domain_topology_level where all CPU capacities are visible
				1907	* for all CPUs.
				1908	*/
				1909	static struct sched_domain_topology_level
				1910	asym_cpu_capacity_level(const struct cpumask cpu_map)
				1911	{
				1912	int i, j, asym_level = 0;
				1913	bool asym = false;
				1914	struct sched_domain_topology_level tl, asym_tl = NULL;
				1915	unsigned long cap;
				1916
				1917	/* Is there any asymmetry? */
Vincent Guittot	8ec59c0	2019-06-17 17:00:17 +0200	[diff] [blame]	1918	cap = arch_scale_cpu_capacity(cpumask_first(cpu_map));
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	1919
				1920	for_each_cpu(i, cpu_map) {
Vincent Guittot	8ec59c0	2019-06-17 17:00:17 +0200	[diff] [blame]	1921	if (arch_scale_cpu_capacity(i) != cap) {
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	1922	asym = true;
				1923	break;
				1924	}
				1925	}
				1926
				1927	if (!asym)
				1928	return NULL;
				1929
				1930	/*
				1931	* Examine topology from all CPU's point of views to detect the lowest
				1932	* sched_domain_topology_level where a highest capacity CPU is visible
				1933	* to everyone.
				1934	*/
				1935	for_each_cpu(i, cpu_map) {
Vincent Guittot	8ec59c0	2019-06-17 17:00:17 +0200	[diff] [blame]	1936	unsigned long max_capacity = arch_scale_cpu_capacity(i);
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	1937	int tl_id = 0;
				1938
				1939	for_each_sd_topology(tl) {
				1940	if (tl_id < asym_level)
				1941	goto next_level;
				1942
				1943	for_each_cpu_and(j, tl->mask(i), cpu_map) {
				1944	unsigned long capacity;
				1945
Vincent Guittot	8ec59c0	2019-06-17 17:00:17 +0200	[diff] [blame]	1946	capacity = arch_scale_cpu_capacity(j);
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	1947
				1948	if (capacity <= max_capacity)
				1949	continue;
				1950
				1951	max_capacity = capacity;
				1952	asym_level = tl_id;
				1953	asym_tl = tl;
				1954	}
				1955	next_level:
				1956	tl_id++;
				1957	}
				1958	}
				1959
				1960	return asym_tl;
				1961	}
				1962
				1963
				1964	/*
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1965	* Build sched domains for a given set of CPUs and attach the sched domains
				1966	* to the individual CPUs
				1967	*/
				1968	static int
				1969	build_sched_domains(const struct cpumask cpu_map, struct sched_domain_attr attr)
				1970	{
Valentin Schneider	cd1cb33	2019-10-23 16:37:44 +0100	[diff] [blame]	1971	enum s_alloc alloc_state = sa_none;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1972	struct sched_domain *sd;
				1973	struct s_data d;
				1974	struct rq *rq = NULL;
				1975	int i, ret = -ENOMEM;
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	1976	struct sched_domain_topology_level *tl_asym;
Morten Rasmussen	df054e8	2018-07-04 11:17:39 +0100	[diff] [blame]	1977	bool has_asym = false;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1978
Valentin Schneider	cd1cb33	2019-10-23 16:37:44 +0100	[diff] [blame]	1979	if (WARN_ON(cpumask_empty(cpu_map)))
				1980	goto error;
				1981
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1982	alloc_state = __visit_domain_allocation_hell(&d, cpu_map);
				1983	if (alloc_state != sa_rootdomain)
				1984	goto error;
				1985
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	1986	tl_asym = asym_cpu_capacity_level(cpu_map);
				1987
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1988	/* Set up domains for CPUs specified by the cpu_map: */
				1989	for_each_cpu(i, cpu_map) {
				1990	struct sched_domain_topology_level *tl;
Valentin Schneider	c200191	2020-08-17 12:29:56 +0100	[diff] [blame^]	1991	int dflags = 0;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	1992
				1993	sd = NULL;
				1994	for_each_sd_topology(tl) {
Morten Rasmussen	df054e8	2018-07-04 11:17:39 +0100	[diff] [blame]	1995	if (tl == tl_asym) {
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	1996	dflags \|= SD_ASYM_CPUCAPACITY;
Morten Rasmussen	df054e8	2018-07-04 11:17:39 +0100	[diff] [blame]	1997	has_asym = true;
				1998	}
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	1999
Valentin Schneider	ccf7412	2020-01-15 16:09:15 +0000	[diff] [blame]	2000	if (WARN_ON(!topology_span_sane(tl, cpu_map, i)))
				2001	goto error;
				2002
Morten Rasmussen	05484e0	2018-07-20 14:32:31 +0100	[diff] [blame]	2003	sd = build_sched_domain(tl, cpu_map, attr, sd, dflags, i);
				2004
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2005	if (tl == sched_domain_topology)
				2006	*per_cpu_ptr(d.sd, i) = sd;
Peter Zijlstra	af85596	2017-04-26 17:36:41 +0200	[diff] [blame]	2007	if (tl->flags & SDTL_OVERLAP)
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2008	sd->flags \|= SD_OVERLAP;
				2009	if (cpumask_equal(cpu_map, sched_domain_span(sd)))
				2010	break;
				2011	}
				2012	}
				2013
				2014	/* Build the groups for the domains */
				2015	for_each_cpu(i, cpu_map) {
				2016	for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
				2017	sd->span_weight = cpumask_weight(sched_domain_span(sd));
				2018	if (sd->flags & SD_OVERLAP) {
				2019	if (build_overlap_sched_groups(sd, i))
				2020	goto error;
				2021	} else {
				2022	if (build_sched_groups(sd, i))
				2023	goto error;
				2024	}
				2025	}
				2026	}
				2027
				2028	/* Calculate CPU capacity for physical packages and nodes */
				2029	for (i = nr_cpumask_bits-1; i >= 0; i--) {
				2030	if (!cpumask_test_cpu(i, cpu_map))
				2031	continue;
				2032
				2033	for (sd = *per_cpu_ptr(d.sd, i); sd; sd = sd->parent) {
				2034	claim_allocations(i, sd);
				2035	init_sched_groups_capacity(i, sd);
				2036	}
				2037	}
				2038
				2039	/* Attach the domains */
				2040	rcu_read_lock();
				2041	for_each_cpu(i, cpu_map) {
				2042	rq = cpu_rq(i);
				2043	sd = *per_cpu_ptr(d.sd, i);
				2044
				2045	/* Use READ_ONCE()/WRITE_ONCE() to avoid load/store tearing: */
				2046	if (rq->cpu_capacity_orig > READ_ONCE(d.rd->max_cpu_capacity))
				2047	WRITE_ONCE(d.rd->max_cpu_capacity, rq->cpu_capacity_orig);
				2048
				2049	cpu_attach_domain(sd, d.rd, i);
				2050	}
				2051	rcu_read_unlock();
				2052
Morten Rasmussen	df054e8	2018-07-04 11:17:39 +0100	[diff] [blame]	2053	if (has_asym)
Valentin Schneider	e284df7	2019-10-23 16:37:45 +0100	[diff] [blame]	2054	static_branch_inc_cpuslocked(&sched_asym_cpucapacity);
Morten Rasmussen	df054e8	2018-07-04 11:17:39 +0100	[diff] [blame]	2055
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2056	if (rq && sched_debug_enabled) {
Juri Lelli	bf5015a	2018-05-24 17:29:36 +0200	[diff] [blame]	2057	pr_info("root domain span: %*pbl (max cpu_capacity = %lu)\n",
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2058	cpumask_pr_args(cpu_map), rq->rd->max_cpu_capacity);
				2059	}
				2060
				2061	ret = 0;
				2062	error:
				2063	__free_domain_allocs(&d, alloc_state, cpu_map);
Ingo Molnar	97fb7a0	2018-03-03 14:01:12 +0100	[diff] [blame]	2064
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2065	return ret;
				2066	}
				2067
				2068	/* Current sched domains: */
				2069	static cpumask_var_t *doms_cur;
				2070
				2071	/* Number of sched domains in 'doms_cur': */
				2072	static int ndoms_cur;
				2073
				2074	/* Attribues of custom domains in 'doms_cur' */
				2075	static struct sched_domain_attr *dattr_cur;
				2076
				2077	/*
				2078	* Special case: If a kmalloc() of a doms_cur partition (array of
				2079	* cpumask) fails, then fallback to a single sched domain,
				2080	* as determined by the single cpumask fallback_doms.
				2081	*/
Peter Zijlstra	8d5dc51	2017-04-25 15:29:40 +0200	[diff] [blame]	2082	static cpumask_var_t fallback_doms;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2083
				2084	/*
				2085	* arch_update_cpu_topology lets virtualized architectures update the
				2086	* CPU core maps. It is supposed to return 1 if the topology changed
				2087	* or 0 if it stayed the same.
				2088	*/
				2089	int __weak arch_update_cpu_topology(void)
				2090	{
				2091	return 0;
				2092	}
				2093
				2094	cpumask_var_t *alloc_sched_domains(unsigned int ndoms)
				2095	{
				2096	int i;
				2097	cpumask_var_t *doms;
				2098
Kees Cook	6da2ec5	2018-06-12 13:55:00 -0700	[diff] [blame]	2099	doms = kmalloc_array(ndoms, sizeof(*doms), GFP_KERNEL);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2100	if (!doms)
				2101	return NULL;
				2102	for (i = 0; i < ndoms; i++) {
				2103	if (!alloc_cpumask_var(&doms[i], GFP_KERNEL)) {
				2104	free_sched_domains(doms, i);
				2105	return NULL;
				2106	}
				2107	}
				2108	return doms;
				2109	}
				2110
				2111	void free_sched_domains(cpumask_var_t doms[], unsigned int ndoms)
				2112	{
				2113	unsigned int i;
				2114	for (i = 0; i < ndoms; i++)
				2115	free_cpumask_var(doms[i]);
				2116	kfree(doms);
				2117	}
				2118
				2119	/*
Juri Lelli	cb0c041	2018-12-19 14:34:45 +0100	[diff] [blame]	2120	* Set up scheduler domains and groups. For now this just excludes isolated
				2121	* CPUs, but could be used to exclude other special cases in the future.
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2122	*/
Peter Zijlstra	8d5dc51	2017-04-25 15:29:40 +0200	[diff] [blame]	2123	int sched_init_domains(const struct cpumask *cpu_map)
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2124	{
				2125	int err;
				2126
Peter Zijlstra	8d5dc51	2017-04-25 15:29:40 +0200	[diff] [blame]	2127	zalloc_cpumask_var(&sched_domains_tmpmask, GFP_KERNEL);
Peter Zijlstra	1676330	2017-04-25 14:31:11 +0200	[diff] [blame]	2128	zalloc_cpumask_var(&sched_domains_tmpmask2, GFP_KERNEL);
Peter Zijlstra	8d5dc51	2017-04-25 15:29:40 +0200	[diff] [blame]	2129	zalloc_cpumask_var(&fallback_doms, GFP_KERNEL);
				2130
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2131	arch_update_cpu_topology();
				2132	ndoms_cur = 1;
				2133	doms_cur = alloc_sched_domains(ndoms_cur);
				2134	if (!doms_cur)
				2135	doms_cur = &fallback_doms;
Frederic Weisbecker	edb9382	2017-10-27 04:42:37 +0200	[diff] [blame]	2136	cpumask_and(doms_cur[0], cpu_map, housekeeping_cpumask(HK_FLAG_DOMAIN));
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2137	err = build_sched_domains(doms_cur[0], NULL);
				2138	register_sched_domain_sysctl();
				2139
				2140	return err;
				2141	}
				2142
				2143	/*
				2144	* Detach sched domains from a group of CPUs specified in cpu_map
				2145	* These CPUs will now be attached to the NULL domain
				2146	*/
				2147	static void detach_destroy_domains(const struct cpumask *cpu_map)
				2148	{
Valentin Schneider	e284df7	2019-10-23 16:37:45 +0100	[diff] [blame]	2149	unsigned int cpu = cpumask_any(cpu_map);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2150	int i;
				2151
Valentin Schneider	e284df7	2019-10-23 16:37:45 +0100	[diff] [blame]	2152	if (rcu_access_pointer(per_cpu(sd_asym_cpucapacity, cpu)))
				2153	static_branch_dec_cpuslocked(&sched_asym_cpucapacity);
				2154
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2155	rcu_read_lock();
				2156	for_each_cpu(i, cpu_map)
				2157	cpu_attach_domain(NULL, &def_root_domain, i);
				2158	rcu_read_unlock();
				2159	}
				2160
				2161	/* handle null as "default" */
				2162	static int dattrs_equal(struct sched_domain_attr *cur, int idx_cur,
				2163	struct sched_domain_attr *new, int idx_new)
				2164	{
				2165	struct sched_domain_attr tmp;
				2166
				2167	/* Fast path: */
				2168	if (!new && !cur)
				2169	return 1;
				2170
				2171	tmp = SD_ATTR_INIT;
Ingo Molnar	97fb7a0	2018-03-03 14:01:12 +0100	[diff] [blame]	2172
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2173	return !memcmp(cur ? (cur + idx_cur) : &tmp,
				2174	new ? (new + idx_new) : &tmp,
				2175	sizeof(struct sched_domain_attr));
				2176	}
				2177
				2178	/*
				2179	* Partition sched domains as specified by the 'ndoms_new'
				2180	* cpumasks in the array doms_new[] of cpumasks. This compares
				2181	* doms_new[] to the current sched domain partitioning, doms_cur[].
				2182	* It destroys each deleted domain and builds each new domain.
				2183	*
				2184	* 'doms_new' is an array of cpumask_var_t's of length 'ndoms_new'.
				2185	* The masks don't intersect (don't overlap.) We should setup one
				2186	* sched domain for each mask. CPUs not in any of the cpumasks will
				2187	* not be load balanced. If the same cpumask appears both in the
				2188	* current 'doms_cur' domains and in the new 'doms_new', we can leave
				2189	* it as it is.
				2190	*
				2191	* The passed in 'doms_new' should be allocated using
				2192	* alloc_sched_domains. This routine takes ownership of it and will
				2193	* free_sched_domains it when done with it. If the caller failed the
				2194	* alloc call, then it can pass in doms_new == NULL && ndoms_new == 1,
				2195	* and partition_sched_domains() will fallback to the single partition
				2196	* 'fallback_doms', it also forces the domains to be rebuilt.
				2197	*
				2198	* If doms_new == NULL it will be replaced with cpu_online_mask.
				2199	* ndoms_new == 0 is a special case for destroying existing domains,
				2200	* and it will not create the default domain.
				2201	*
Mathieu Poirier	c22645f	2019-07-19 15:59:53 +0200	[diff] [blame]	2202	* Call with hotplug lock and sched_domains_mutex held
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2203	*/
Mathieu Poirier	c22645f	2019-07-19 15:59:53 +0200	[diff] [blame]	2204	void partition_sched_domains_locked(int ndoms_new, cpumask_var_t doms_new[],
				2205	struct sched_domain_attr *dattr_new)
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2206	{
Quentin Perret	1f74de8	2018-12-03 09:56:22 +0000	[diff] [blame]	2207	bool __maybe_unused has_eas = false;
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2208	int i, j, n;
				2209	int new_topology;
				2210
Mathieu Poirier	c22645f	2019-07-19 15:59:53 +0200	[diff] [blame]	2211	lockdep_assert_held(&sched_domains_mutex);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2212
				2213	/* Always unregister in case we don't destroy any domains: */
				2214	unregister_sched_domain_sysctl();
				2215
				2216	/* Let the architecture update CPU core mappings: */
				2217	new_topology = arch_update_cpu_topology();
				2218
Peter Zijlstra	09e0dd8	2017-08-08 12:16:24 +0200	[diff] [blame]	2219	if (!doms_new) {
				2220	WARN_ON_ONCE(dattr_new);
				2221	n = 0;
				2222	doms_new = alloc_sched_domains(1);
				2223	if (doms_new) {
				2224	n = 1;
Frederic Weisbecker	edb9382	2017-10-27 04:42:37 +0200	[diff] [blame]	2225	cpumask_and(doms_new[0], cpu_active_mask,
				2226	housekeeping_cpumask(HK_FLAG_DOMAIN));
Peter Zijlstra	09e0dd8	2017-08-08 12:16:24 +0200	[diff] [blame]	2227	}
				2228	} else {
				2229	n = ndoms_new;
				2230	}
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2231
				2232	/* Destroy deleted domains: */
				2233	for (i = 0; i < ndoms_cur; i++) {
				2234	for (j = 0; j < n && !new_topology; j++) {
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	2235	if (cpumask_equal(doms_cur[i], doms_new[j]) &&
Mathieu Poirier	f9a25f7	2019-07-19 15:59:55 +0200	[diff] [blame]	2236	dattrs_equal(dattr_cur, i, dattr_new, j)) {
				2237	struct root_domain *rd;
				2238
				2239	/*
				2240	* This domain won't be destroyed and as such
				2241	* its dl_bw->total_bw needs to be cleared. It
				2242	* will be recomputed in function
				2243	* update_tasks_root_domain().
				2244	*/
				2245	rd = cpu_rq(cpumask_any(doms_cur[i]))->rd;
				2246	dl_clear_root_domain(rd);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2247	goto match1;
Mathieu Poirier	f9a25f7	2019-07-19 15:59:55 +0200	[diff] [blame]	2248	}
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2249	}
				2250	/* No match - a current sched domain not in new doms_new[] */
				2251	detach_destroy_domains(doms_cur[i]);
				2252	match1:
				2253	;
				2254	}
				2255
				2256	n = ndoms_cur;
Peter Zijlstra	09e0dd8	2017-08-08 12:16:24 +0200	[diff] [blame]	2257	if (!doms_new) {
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2258	n = 0;
				2259	doms_new = &fallback_doms;
Frederic Weisbecker	edb9382	2017-10-27 04:42:37 +0200	[diff] [blame]	2260	cpumask_and(doms_new[0], cpu_active_mask,
				2261	housekeeping_cpumask(HK_FLAG_DOMAIN));
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2262	}
				2263
				2264	/* Build new domains: */
				2265	for (i = 0; i < ndoms_new; i++) {
				2266	for (j = 0; j < n && !new_topology; j++) {
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	2267	if (cpumask_equal(doms_new[i], doms_cur[j]) &&
				2268	dattrs_equal(dattr_new, i, dattr_cur, j))
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2269	goto match2;
				2270	}
				2271	/* No match - add a new doms_new */
				2272	build_sched_domains(doms_new[i], dattr_new ? dattr_new + i : NULL);
				2273	match2:
				2274	;
				2275	}
				2276
Quentin Perret	531b5c9	2018-12-03 09:56:21 +0000	[diff] [blame]	2277	#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	2278	/* Build perf. domains: */
				2279	for (i = 0; i < ndoms_new; i++) {
Quentin Perret	531b5c9	2018-12-03 09:56:21 +0000	[diff] [blame]	2280	for (j = 0; j < n && !sched_energy_update; j++) {
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	2281	if (cpumask_equal(doms_new[i], doms_cur[j]) &&
Quentin Perret	1f74de8	2018-12-03 09:56:22 +0000	[diff] [blame]	2282	cpu_rq(cpumask_first(doms_cur[j]))->rd->pd) {
				2283	has_eas = true;
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	2284	goto match3;
Quentin Perret	1f74de8	2018-12-03 09:56:22 +0000	[diff] [blame]	2285	}
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	2286	}
				2287	/* No match - add perf. domains for a new rd */
Quentin Perret	1f74de8	2018-12-03 09:56:22 +0000	[diff] [blame]	2288	has_eas \|= build_perf_domains(doms_new[i]);
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	2289	match3:
				2290	;
				2291	}
Quentin Perret	1f74de8	2018-12-03 09:56:22 +0000	[diff] [blame]	2292	sched_energy_set(has_eas);
Quentin Perret	6aa140f	2018-12-03 09:56:18 +0000	[diff] [blame]	2293	#endif
				2294
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2295	/* Remember the new sched domains: */
				2296	if (doms_cur != &fallback_doms)
				2297	free_sched_domains(doms_cur, ndoms_cur);
				2298
				2299	kfree(dattr_cur);
				2300	doms_cur = doms_new;
				2301	dattr_cur = dattr_new;
				2302	ndoms_cur = ndoms_new;
				2303
				2304	register_sched_domain_sysctl();
Mathieu Poirier	c22645f	2019-07-19 15:59:53 +0200	[diff] [blame]	2305	}
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2306
Mathieu Poirier	c22645f	2019-07-19 15:59:53 +0200	[diff] [blame]	2307	/*
				2308	* Call with hotplug lock held
				2309	*/
				2310	void partition_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
				2311	struct sched_domain_attr *dattr_new)
				2312	{
				2313	mutex_lock(&sched_domains_mutex);
				2314	partition_sched_domains_locked(ndoms_new, doms_new, dattr_new);
Ingo Molnar	f2cb136	2017-02-01 13:10:18 +0100	[diff] [blame]	2315	mutex_unlock(&sched_domains_mutex);
				2316	}