Blame - drivers/infiniband/hw/hfi1/affinity.c - SHIFTPHONES/mainline/linux

blob: 14d7eeb09be6545f5f21144f0f11ea41c53203a4 [file] [log] [blame]

Mitko Haralanov	957558c	2016-02-03 14:33:40 -0800	[diff] [blame]	1	/*
Jubin John	05d6ac1	2016-02-14 20:22:17 -0800	[diff] [blame]	2	* Copyright(c) 2015, 2016 Intel Corporation.
Mitko Haralanov	957558c	2016-02-03 14:33:40 -0800	[diff] [blame]	3	*
				4	* This file is provided under a dual BSD/GPLv2 license. When using or
				5	* redistributing this file, you may do so under either license.
				6	*
				7	* GPL LICENSE SUMMARY
				8	*
Mitko Haralanov	957558c	2016-02-03 14:33:40 -0800	[diff] [blame]	9	* This program is free software; you can redistribute it and/or modify
				10	* it under the terms of version 2 of the GNU General Public License as
				11	* published by the Free Software Foundation.
				12	*
				13	* This program is distributed in the hope that it will be useful, but
				14	* WITHOUT ANY WARRANTY; without even the implied warranty of
				15	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				16	* General Public License for more details.
				17	*
				18	* BSD LICENSE
				19	*
Mitko Haralanov	957558c	2016-02-03 14:33:40 -0800	[diff] [blame]	20	* Redistribution and use in source and binary forms, with or without
				21	* modification, are permitted provided that the following conditions
				22	* are met:
				23	*
				24	* - Redistributions of source code must retain the above copyright
				25	* notice, this list of conditions and the following disclaimer.
				26	* - Redistributions in binary form must reproduce the above copyright
				27	* notice, this list of conditions and the following disclaimer in
				28	* the documentation and/or other materials provided with the
				29	* distribution.
				30	* - Neither the name of Intel Corporation nor the names of its
				31	* contributors may be used to endorse or promote products derived
				32	* from this software without specific prior written permission.
				33	*
				34	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				35	* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				36	* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				37	* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				38	* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				39	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				40	* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				41	* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				42	* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				43	* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				44	* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				45	*
				46	*/
				47	#include <linux/topology.h>
				48	#include <linux/cpumask.h>
				49	#include <linux/module.h>
				50
				51	#include "hfi.h"
				52	#include "affinity.h"
				53	#include "sdma.h"
				54	#include "trace.h"
				55
Mitko Haralanov	957558c	2016-02-03 14:33:40 -0800	[diff] [blame]	56	/* Name of IRQ types, indexed by enum irq_type */
				57	static const char * const irq_type_names[] = {
				58	"SDMA",
				59	"RCVCTXT",
				60	"GENERAL",
				61	"OTHER",
				62	};
				63
				64	static inline void init_cpu_mask_set(struct cpu_mask_set *set)
				65	{
				66	cpumask_clear(&set->mask);
				67	cpumask_clear(&set->used);
				68	set->gen = 0;
				69	}
				70
Jubin John	0852d24	2016-04-12 11:30:08 -0700	[diff] [blame]	71	/* Initialize non-HT cpu cores mask */
				72	int init_real_cpu_mask(struct hfi1_devdata *dd)
				73	{
				74	struct hfi1_affinity *info;
				75	int possible, curr_cpu, i, ht;
				76
				77	info = kzalloc(sizeof(*info), GFP_KERNEL);
				78	if (!info)
				79	return -ENOMEM;
				80
				81	cpumask_clear(&info->real_cpu_mask);
				82
				83	/* Start with cpu online mask as the real cpu mask */
				84	cpumask_copy(&info->real_cpu_mask, cpu_online_mask);
				85
				86	/*
				87	* Remove HT cores from the real cpu mask. Do this in two steps below.
				88	*/
				89	possible = cpumask_weight(&info->real_cpu_mask);
				90	ht = cpumask_weight(topology_sibling_cpumask(
				91	cpumask_first(&info->real_cpu_mask)));
				92	/*
				93	* Step 1. Skip over the first N HT siblings and use them as the
				94	* "real" cores. Assumes that HT cores are not enumerated in
				95	* succession (except in the single core case).
				96	*/
				97	curr_cpu = cpumask_first(&info->real_cpu_mask);
				98	for (i = 0; i < possible / ht; i++)
				99	curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask);
				100	/*
				101	* Step 2. Remove the remaining HT siblings. Use cpumask_next() to
				102	* skip any gaps.
				103	*/
				104	for (; i < possible; i++) {
				105	cpumask_clear_cpu(curr_cpu, &info->real_cpu_mask);
				106	curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask);
				107	}
				108
				109	dd->affinity = info;
				110	return 0;
				111	}
				112
Mitko Haralanov	957558c	2016-02-03 14:33:40 -0800	[diff] [blame]	113	/*
				114	* Interrupt affinity.
				115	*
				116	* non-rcv avail gets a default mask that
				117	* starts as possible cpus with threads reset
				118	* and each rcv avail reset.
				119	*
				120	* rcv avail gets node relative 1 wrapping back
				121	* to the node relative 1 as necessary.
				122	*
				123	*/
Jubin John	0852d24	2016-04-12 11:30:08 -0700	[diff] [blame]	124	void hfi1_dev_affinity_init(struct hfi1_devdata *dd)
Mitko Haralanov	957558c	2016-02-03 14:33:40 -0800	[diff] [blame]	125	{
				126	int node = pcibus_to_node(dd->pcidev->bus);
Jubin John	0852d24	2016-04-12 11:30:08 -0700	[diff] [blame]	127	struct hfi1_affinity *info = dd->affinity;
Mitko Haralanov	957558c	2016-02-03 14:33:40 -0800	[diff] [blame]	128	const struct cpumask *local_mask;
Jubin John	0852d24	2016-04-12 11:30:08 -0700	[diff] [blame]	129	int curr_cpu, possible, i;
Mitko Haralanov	957558c	2016-02-03 14:33:40 -0800	[diff] [blame]	130
				131	if (node < 0)
				132	node = numa_node_id();
				133	dd->node = node;
				134
Mitko Haralanov	957558c	2016-02-03 14:33:40 -0800	[diff] [blame]	135	spin_lock_init(&info->lock);
				136
				137	init_cpu_mask_set(&info->def_intr);
				138	init_cpu_mask_set(&info->rcv_intr);
				139	init_cpu_mask_set(&info->proc);
				140
				141	local_mask = cpumask_of_node(dd->node);
				142	if (cpumask_first(local_mask) >= nr_cpu_ids)
				143	local_mask = topology_core_cpumask(0);
Jubin John	0852d24	2016-04-12 11:30:08 -0700	[diff] [blame]	144	/* Use the "real" cpu mask of this node as the default */
				145	cpumask_and(&info->def_intr.mask, &info->real_cpu_mask, local_mask);
Mitko Haralanov	957558c	2016-02-03 14:33:40 -0800	[diff] [blame]	146
				147	/* fill in the receive list */
				148	possible = cpumask_weight(&info->def_intr.mask);
				149	curr_cpu = cpumask_first(&info->def_intr.mask);
				150	if (possible == 1) {
				151	/* only one CPU, everyone will use it */
				152	cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask);
				153	} else {
				154	/*
				155	* Retain the first CPU in the default list for the control
				156	* context.
				157	*/
				158	curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
				159	/*
				160	* Remove the remaining kernel receive queues from
				161	* the default list and add them to the receive list.
				162	*/
				163	for (i = 0; i < dd->n_krcv_queues - 1; i++) {
				164	cpumask_clear_cpu(curr_cpu, &info->def_intr.mask);
				165	cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask);
				166	curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
				167	if (curr_cpu >= nr_cpu_ids)
				168	break;
				169	}
				170	}
				171
				172	cpumask_copy(&info->proc.mask, cpu_online_mask);
Mitko Haralanov	957558c	2016-02-03 14:33:40 -0800	[diff] [blame]	173	}
				174
				175	void hfi1_dev_affinity_free(struct hfi1_devdata *dd)
				176	{
				177	kfree(dd->affinity);
				178	}
				179
				180	int hfi1_get_irq_affinity(struct hfi1_devdata dd, struct hfi1_msix_entry msix)
				181	{
				182	int ret;
				183	cpumask_var_t diff;
				184	struct cpu_mask_set *set;
				185	struct sdma_engine *sde = NULL;
				186	struct hfi1_ctxtdata *rcd = NULL;
				187	char extra[64];
				188	int cpu = -1;
				189
				190	extra[0] = '\0';
				191	cpumask_clear(&msix->mask);
				192
				193	ret = zalloc_cpumask_var(&diff, GFP_KERNEL);
				194	if (!ret)
				195	return -ENOMEM;
				196
				197	switch (msix->type) {
				198	case IRQ_SDMA:
				199	sde = (struct sdma_engine *)msix->arg;
				200	scnprintf(extra, 64, "engine %u", sde->this_idx);
				201	/* fall through */
				202	case IRQ_GENERAL:
				203	set = &dd->affinity->def_intr;
				204	break;
				205	case IRQ_RCVCTXT:
				206	rcd = (struct hfi1_ctxtdata *)msix->arg;
				207	if (rcd->ctxt == HFI1_CTRL_CTXT) {
				208	set = &dd->affinity->def_intr;
				209	cpu = cpumask_first(&set->mask);
				210	} else {
				211	set = &dd->affinity->rcv_intr;
				212	}
				213	scnprintf(extra, 64, "ctxt %u", rcd->ctxt);
				214	break;
				215	default:
				216	dd_dev_err(dd, "Invalid IRQ type %d\n", msix->type);
				217	return -EINVAL;
				218	}
				219
				220	/*
				221	* The control receive context is placed on a particular CPU, which
				222	* is set above. Skip accounting for it. Everything else finds its
				223	* CPU here.
				224	*/
				225	if (cpu == -1) {
				226	spin_lock(&dd->affinity->lock);
				227	if (cpumask_equal(&set->mask, &set->used)) {
				228	/*
				229	* We've used up all the CPUs, bump up the generation
				230	* and reset the 'used' map
				231	*/
				232	set->gen++;
				233	cpumask_clear(&set->used);
				234	}
				235	cpumask_andnot(diff, &set->mask, &set->used);
				236	cpu = cpumask_first(diff);
				237	cpumask_set_cpu(cpu, &set->used);
				238	spin_unlock(&dd->affinity->lock);
				239	}
				240
				241	switch (msix->type) {
				242	case IRQ_SDMA:
				243	sde->cpu = cpu;
				244	break;
				245	case IRQ_GENERAL:
				246	case IRQ_RCVCTXT:
				247	case IRQ_OTHER:
				248	break;
				249	}
				250
				251	cpumask_set_cpu(cpu, &msix->mask);
				252	dd_dev_info(dd, "IRQ vector: %u, type %s %s -> cpu: %d\n",
				253	msix->msix.vector, irq_type_names[msix->type],
				254	extra, cpu);
				255	irq_set_affinity_hint(msix->msix.vector, &msix->mask);
				256
				257	free_cpumask_var(diff);
				258	return 0;
				259	}
				260
				261	void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
				262	struct hfi1_msix_entry *msix)
				263	{
				264	struct cpu_mask_set *set = NULL;
				265	struct hfi1_ctxtdata *rcd;
				266
				267	switch (msix->type) {
				268	case IRQ_SDMA:
				269	case IRQ_GENERAL:
				270	set = &dd->affinity->def_intr;
				271	break;
				272	case IRQ_RCVCTXT:
				273	rcd = (struct hfi1_ctxtdata *)msix->arg;
				274	/* only do accounting for non control contexts */
				275	if (rcd->ctxt != HFI1_CTRL_CTXT)
				276	set = &dd->affinity->rcv_intr;
				277	break;
				278	default:
				279	return;
				280	}
				281
				282	if (set) {
				283	spin_lock(&dd->affinity->lock);
				284	cpumask_andnot(&set->used, &set->used, &msix->mask);
				285	if (cpumask_empty(&set->used) && set->gen) {
				286	set->gen--;
				287	cpumask_copy(&set->used, &set->mask);
				288	}
				289	spin_unlock(&dd->affinity->lock);
				290	}
				291
				292	irq_set_affinity_hint(msix->msix.vector, NULL);
				293	cpumask_clear(&msix->mask);
				294	}
				295
				296	int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node)
				297	{
				298	int cpu = -1, ret;
				299	cpumask_var_t diff, mask, intrs;
				300	const struct cpumask *node_mask,
				301	*proc_mask = tsk_cpus_allowed(current);
				302	struct cpu_mask_set *set = &dd->affinity->proc;
Mitko Haralanov	957558c	2016-02-03 14:33:40 -0800	[diff] [blame]	303
				304	/*
				305	* check whether process/context affinity has already
				306	* been set
				307	*/
				308	if (cpumask_weight(proc_mask) == 1) {
Leon Romanovsky	f242d93a	2016-05-31 10:54:36 +0300	[diff] [blame^]	309	hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl",
				310	current->pid, current->comm,
				311	cpumask_pr_args(proc_mask));
Mitko Haralanov	957558c	2016-02-03 14:33:40 -0800	[diff] [blame]	312	/*
				313	* Mark the pre-set CPU as used. This is atomic so we don't
				314	* need the lock
				315	*/
				316	cpu = cpumask_first(proc_mask);
				317	cpumask_set_cpu(cpu, &set->used);
				318	goto done;
				319	} else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) {
Leon Romanovsky	f242d93a	2016-05-31 10:54:36 +0300	[diff] [blame^]	320	hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl",
				321	current->pid, current->comm,
				322	cpumask_pr_args(proc_mask));
Mitko Haralanov	957558c	2016-02-03 14:33:40 -0800	[diff] [blame]	323	goto done;
				324	}
				325
				326	/*
				327	* The process does not have a preset CPU affinity so find one to
				328	* recommend. We prefer CPUs on the same NUMA as the device.
				329	*/
				330
				331	ret = zalloc_cpumask_var(&diff, GFP_KERNEL);
				332	if (!ret)
				333	goto done;
				334	ret = zalloc_cpumask_var(&mask, GFP_KERNEL);
				335	if (!ret)
				336	goto free_diff;
				337	ret = zalloc_cpumask_var(&intrs, GFP_KERNEL);
				338	if (!ret)
				339	goto free_mask;
				340
				341	spin_lock(&dd->affinity->lock);
				342	/*
				343	* If we've used all available CPUs, clear the mask and start
				344	* overloading.
				345	*/
				346	if (cpumask_equal(&set->mask, &set->used)) {
				347	set->gen++;
				348	cpumask_clear(&set->used);
				349	}
				350
				351	/* CPUs used by interrupt handlers */
				352	cpumask_copy(intrs, (dd->affinity->def_intr.gen ?
				353	&dd->affinity->def_intr.mask :
				354	&dd->affinity->def_intr.used));
				355	cpumask_or(intrs, intrs, (dd->affinity->rcv_intr.gen ?
				356	&dd->affinity->rcv_intr.mask :
				357	&dd->affinity->rcv_intr.used));
Leon Romanovsky	f242d93a	2016-05-31 10:54:36 +0300	[diff] [blame^]	358	hfi1_cdbg(PROC, "CPUs used by interrupts: %*pbl",
				359	cpumask_pr_args(intrs));
Mitko Haralanov	957558c	2016-02-03 14:33:40 -0800	[diff] [blame]	360
				361	/*
				362	* If we don't have a NUMA node requested, preference is towards
				363	* device NUMA node
				364	*/
				365	if (node == -1)
				366	node = dd->node;
				367	node_mask = cpumask_of_node(node);
Leon Romanovsky	f242d93a	2016-05-31 10:54:36 +0300	[diff] [blame^]	368	hfi1_cdbg(PROC, "device on NUMA %u, CPUs %*pbl", node,
				369	cpumask_pr_args(node_mask));
Mitko Haralanov	957558c	2016-02-03 14:33:40 -0800	[diff] [blame]	370
				371	/* diff will hold all unused cpus */
				372	cpumask_andnot(diff, &set->mask, &set->used);
Leon Romanovsky	f242d93a	2016-05-31 10:54:36 +0300	[diff] [blame^]	373	hfi1_cdbg(PROC, "unused CPUs (all) %*pbl", cpumask_pr_args(diff));
Mitko Haralanov	957558c	2016-02-03 14:33:40 -0800	[diff] [blame]	374
				375	/* get cpumask of available CPUs on preferred NUMA */
				376	cpumask_and(mask, diff, node_mask);
Leon Romanovsky	f242d93a	2016-05-31 10:54:36 +0300	[diff] [blame^]	377	hfi1_cdbg(PROC, "available cpus on NUMA %*pbl", cpumask_pr_args(mask));
Mitko Haralanov	957558c	2016-02-03 14:33:40 -0800	[diff] [blame]	378
				379	/*
				380	* At first, we don't want to place processes on the same
				381	* CPUs as interrupt handlers.
				382	*/
				383	cpumask_andnot(diff, mask, intrs);
				384	if (!cpumask_empty(diff))
				385	cpumask_copy(mask, diff);
				386
				387	/*
				388	* if we don't have a cpu on the preferred NUMA, get
				389	* the list of the remaining available CPUs
				390	*/
				391	if (cpumask_empty(mask)) {
				392	cpumask_andnot(diff, &set->mask, &set->used);
				393	cpumask_andnot(mask, diff, node_mask);
				394	}
Leon Romanovsky	f242d93a	2016-05-31 10:54:36 +0300	[diff] [blame^]	395	hfi1_cdbg(PROC, "possible CPUs for process %*pbl",
				396	cpumask_pr_args(mask));
Mitko Haralanov	957558c	2016-02-03 14:33:40 -0800	[diff] [blame]	397
				398	cpu = cpumask_first(mask);
				399	if (cpu >= nr_cpu_ids) /* empty */
				400	cpu = -1;
				401	else
				402	cpumask_set_cpu(cpu, &set->used);
				403	spin_unlock(&dd->affinity->lock);
				404
				405	free_cpumask_var(intrs);
				406	free_mask:
				407	free_cpumask_var(mask);
				408	free_diff:
				409	free_cpumask_var(diff);
				410	done:
				411	return cpu;
				412	}
				413
				414	void hfi1_put_proc_affinity(struct hfi1_devdata *dd, int cpu)
				415	{
				416	struct cpu_mask_set *set = &dd->affinity->proc;
				417
				418	if (cpu < 0)
				419	return;
				420	spin_lock(&dd->affinity->lock);
				421	cpumask_clear_cpu(cpu, &set->used);
				422	if (cpumask_empty(&set->used) && set->gen) {
				423	set->gen--;
				424	cpumask_copy(&set->used, &set->mask);
				425	}
				426	spin_unlock(&dd->affinity->lock);
				427	}
				428