blob: 24c07094651a340e9af7fbd475fbcfe35ad01d4c [file] [log] [blame]
Thomas Gleixnerd2912cb2019-06-04 10:11:33 +02001// SPDX-License-Identifier: GPL-2.0-only
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10002/*
3 * Copyright 2017 Benjamin Herrenschmidt, IBM Corporation.
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10004 */
5
6#define pr_fmt(fmt) "xive-kvm: " fmt
7
8#include <linux/kernel.h>
9#include <linux/kvm_host.h>
10#include <linux/err.h>
11#include <linux/gfp.h>
12#include <linux/spinlock.h>
13#include <linux/delay.h>
14#include <linux/percpu.h>
15#include <linux/cpumask.h>
Al Viro5bb866d2017-12-04 14:43:20 -050016#include <linux/uaccess.h>
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +100017#include <asm/kvm_book3s.h>
18#include <asm/kvm_ppc.h>
19#include <asm/hvcall.h>
20#include <asm/xics.h>
21#include <asm/xive.h>
22#include <asm/xive-regs.h>
23#include <asm/debug.h>
Paolo Bonzini4415b332017-05-09 11:50:01 +020024#include <asm/debugfs.h>
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +100025#include <asm/time.h>
26#include <asm/opal.h>
27
28#include <linux/debugfs.h>
29#include <linux/seq_file.h>
30
31#include "book3s_xive.h"
32
33
34/*
35 * Virtual mode variants of the hcalls for use on radix/radix
36 * with AIL. They require the VCPU's VP to be "pushed"
37 *
Finn Thain3cc97be2018-08-23 17:00:52 -070038 * We still instantiate them here because we use some of the
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +100039 * generated utility functions as well in this file.
40 */
41#define XIVE_RUNTIME_CHECKS
42#define X_PFX xive_vm_
43#define X_STATIC static
44#define X_STAT_PFX stat_vm_
45#define __x_tima xive_tima
46#define __x_eoi_page(xd) ((void __iomem *)((xd)->eoi_mmio))
47#define __x_trig_page(xd) ((void __iomem *)((xd)->trig_mmio))
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +100048#define __x_writeb __raw_writeb
49#define __x_readw __raw_readw
50#define __x_readq __raw_readq
51#define __x_writeq __raw_writeq
52
53#include "book3s_xive_template.c"
54
55/*
56 * We leave a gap of a couple of interrupts in the queue to
57 * account for the IPI and additional safety guard.
58 */
59#define XIVE_Q_GAP 2
60
61/*
Paul Mackerras95a64322018-10-08 16:30:55 +110062 * Push a vcpu's context to the XIVE on guest entry.
63 * This assumes we are in virtual mode (MMU on)
64 */
65void kvmppc_xive_push_vcpu(struct kvm_vcpu *vcpu)
66{
67 void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
68 u64 pq;
69
Paul Mackerras8d4ba9c2019-08-13 20:01:00 +100070 /*
71 * Nothing to do if the platform doesn't have a XIVE
72 * or this vCPU doesn't have its own XIVE context
73 * (e.g. because it's not using an in-kernel interrupt controller).
74 */
75 if (!tima || !vcpu->arch.xive_cam_word)
Paul Mackerras95a64322018-10-08 16:30:55 +110076 return;
Paul Mackerras8d4ba9c2019-08-13 20:01:00 +100077
Paul Mackerras95a64322018-10-08 16:30:55 +110078 eieio();
79 __raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
80 __raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
81 vcpu->arch.xive_pushed = 1;
82 eieio();
83
84 /*
85 * We clear the irq_pending flag. There is a small chance of a
86 * race vs. the escalation interrupt happening on another
87 * processor setting it again, but the only consequence is to
88 * cause a spurious wakeup on the next H_CEDE, which is not an
89 * issue.
90 */
91 vcpu->arch.irq_pending = 0;
92
93 /*
94 * In single escalation mode, if the escalation interrupt is
95 * on, we mask it.
96 */
97 if (vcpu->arch.xive_esc_on) {
98 pq = __raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr +
99 XIVE_ESB_SET_PQ_01));
100 mb();
101
102 /*
103 * We have a possible subtle race here: The escalation
104 * interrupt might have fired and be on its way to the
105 * host queue while we mask it, and if we unmask it
106 * early enough (re-cede right away), there is a
107 * theorical possibility that it fires again, thus
108 * landing in the target queue more than once which is
109 * a big no-no.
110 *
111 * Fortunately, solving this is rather easy. If the
112 * above load setting PQ to 01 returns a previous
113 * value where P is set, then we know the escalation
114 * interrupt is somewhere on its way to the host. In
115 * that case we simply don't clear the xive_esc_on
116 * flag below. It will be eventually cleared by the
117 * handler for the escalation interrupt.
118 *
119 * Then, when doing a cede, we check that flag again
120 * before re-enabling the escalation interrupt, and if
121 * set, we abort the cede.
122 */
123 if (!(pq & XIVE_ESB_VAL_P))
124 /* Now P is 0, we can clear the flag */
125 vcpu->arch.xive_esc_on = 0;
126 }
127}
128EXPORT_SYMBOL_GPL(kvmppc_xive_push_vcpu);
129
130/*
Nicholas Piggin023c3c92021-05-28 19:07:28 +1000131 * Pull a vcpu's context from the XIVE on guest exit.
132 * This assumes we are in virtual mode (MMU on)
133 */
134void kvmppc_xive_pull_vcpu(struct kvm_vcpu *vcpu)
135{
136 void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
137
138 if (!vcpu->arch.xive_pushed)
139 return;
140
141 /*
142 * Should not have been pushed if there is no tima
143 */
144 if (WARN_ON(!tima))
145 return;
146
147 eieio();
148 /* First load to pull the context, we ignore the value */
149 __raw_readl(tima + TM_SPC_PULL_OS_CTX);
150 /* Second load to recover the context state (Words 0 and 1) */
151 vcpu->arch.xive_saved_state.w01 = __raw_readq(tima + TM_QW1_OS);
152
153 /* Fixup some of the state for the next load */
154 vcpu->arch.xive_saved_state.lsmfb = 0;
155 vcpu->arch.xive_saved_state.ack = 0xff;
156 vcpu->arch.xive_pushed = 0;
157 eieio();
158}
159EXPORT_SYMBOL_GPL(kvmppc_xive_pull_vcpu);
160
Nicholas Piggin9dc2bab2021-05-28 19:07:33 +1000161void kvmppc_xive_rearm_escalation(struct kvm_vcpu *vcpu)
162{
163 void __iomem *esc_vaddr = (void __iomem *)vcpu->arch.xive_esc_vaddr;
164
165 if (!esc_vaddr)
166 return;
167
168 /* we are using XIVE with single escalation */
169
170 if (vcpu->arch.xive_esc_on) {
171 /*
172 * If we still have a pending escalation, abort the cede,
173 * and we must set PQ to 10 rather than 00 so that we don't
174 * potentially end up with two entries for the escalation
175 * interrupt in the XIVE interrupt queue. In that case
176 * we also don't want to set xive_esc_on to 1 here in
177 * case we race with xive_esc_irq().
178 */
179 vcpu->arch.ceded = 0;
180 /*
181 * The escalation interrupts are special as we don't EOI them.
182 * There is no need to use the load-after-store ordering offset
183 * to set PQ to 10 as we won't use StoreEOI.
184 */
185 __raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_10);
186 } else {
187 vcpu->arch.xive_esc_on = true;
188 mb();
189 __raw_readq(esc_vaddr + XIVE_ESB_SET_PQ_00);
190 }
191 mb();
192}
193EXPORT_SYMBOL_GPL(kvmppc_xive_rearm_escalation);
194
Nicholas Piggin023c3c92021-05-28 19:07:28 +1000195/*
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +1000196 * This is a simple trigger for a generic XIVE IRQ. This must
197 * only be called for interrupts that support a trigger page
198 */
199static bool xive_irq_trigger(struct xive_irq_data *xd)
200{
201 /* This should be only for MSIs */
202 if (WARN_ON(xd->flags & XIVE_IRQ_FLAG_LSI))
203 return false;
204
205 /* Those interrupts should always have a trigger page */
206 if (WARN_ON(!xd->trig_mmio))
207 return false;
208
209 out_be64(xd->trig_mmio, 0);
210
211 return true;
212}
213
214static irqreturn_t xive_esc_irq(int irq, void *data)
215{
216 struct kvm_vcpu *vcpu = data;
217
Benjamin Herrenschmidt2267ea72018-01-12 13:37:13 +1100218 vcpu->arch.irq_pending = 1;
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +1000219 smp_mb();
220 if (vcpu->arch.ceded)
221 kvmppc_fast_vcpu_kick(vcpu);
222
Benjamin Herrenschmidt9b9b13a2018-01-12 13:37:16 +1100223 /* Since we have the no-EOI flag, the interrupt is effectively
224 * disabled now. Clearing xive_esc_on means we won't bother
225 * doing so on the next entry.
226 *
227 * This also allows the entry code to know that if a PQ combination
228 * of 10 is observed while xive_esc_on is true, it means the queue
229 * contains an unprocessed escalation interrupt. We don't make use of
230 * that knowledge today but might (see comment in book3s_hv_rmhandler.S)
231 */
232 vcpu->arch.xive_esc_on = false;
233
Paul Mackerrasda15c032019-08-13 20:06:48 +1000234 /* This orders xive_esc_on = false vs. subsequent stale_p = true */
235 smp_wmb(); /* goes with smp_mb() in cleanup_single_escalation */
236
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +1000237 return IRQ_HANDLED;
238}
239
Cédric Le Goater13ce3292019-04-18 12:39:31 +0200240int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
241 bool single_escalation)
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +1000242{
243 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
244 struct xive_q *q = &xc->queues[prio];
245 char *name = NULL;
246 int rc;
247
248 /* Already there ? */
249 if (xc->esc_virq[prio])
250 return 0;
251
252 /* Hook up the escalation interrupt */
253 xc->esc_virq[prio] = irq_create_mapping(NULL, q->esc_irq);
254 if (!xc->esc_virq[prio]) {
255 pr_err("Failed to map escalation interrupt for queue %d of VCPU %d\n",
256 prio, xc->server_num);
257 return -EIO;
258 }
259
Cédric Le Goater13ce3292019-04-18 12:39:31 +0200260 if (single_escalation)
Benjamin Herrenschmidtbf4159da2018-01-12 13:37:12 +1100261 name = kasprintf(GFP_KERNEL, "kvm-%d-%d",
262 vcpu->kvm->arch.lpid, xc->server_num);
263 else
264 name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
265 vcpu->kvm->arch.lpid, xc->server_num, prio);
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +1000266 if (!name) {
267 pr_err("Failed to allocate escalation irq name for queue %d of VCPU %d\n",
268 prio, xc->server_num);
269 rc = -ENOMEM;
270 goto error;
271 }
Benjamin Herrenschmidtbf4159da2018-01-12 13:37:12 +1100272
273 pr_devel("Escalation %s irq %d (prio %d)\n", name, xc->esc_virq[prio], prio);
274
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +1000275 rc = request_irq(xc->esc_virq[prio], xive_esc_irq,
276 IRQF_NO_THREAD, name, vcpu);
277 if (rc) {
278 pr_err("Failed to request escalation interrupt for queue %d of VCPU %d\n",
279 prio, xc->server_num);
280 goto error;
281 }
282 xc->esc_virq_names[prio] = name;
Benjamin Herrenschmidt9b9b13a2018-01-12 13:37:16 +1100283
284 /* In single escalation mode, we grab the ESB MMIO of the
285 * interrupt and mask it. Also populate the VCPU v/raddr
286 * of the ESB page for use by asm entry/exit code. Finally
Cédric Le Goater4f1c3f72020-12-10 18:14:39 +0100287 * set the XIVE_IRQ_FLAG_NO_EOI flag which will prevent the
Benjamin Herrenschmidt9b9b13a2018-01-12 13:37:16 +1100288 * core code from performing an EOI on the escalation
289 * interrupt, thus leaving it effectively masked after
290 * it fires once.
291 */
Cédric Le Goater13ce3292019-04-18 12:39:31 +0200292 if (single_escalation) {
Benjamin Herrenschmidt9b9b13a2018-01-12 13:37:16 +1100293 struct irq_data *d = irq_get_irq_data(xc->esc_virq[prio]);
294 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
295
296 xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
297 vcpu->arch.xive_esc_raddr = xd->eoi_page;
298 vcpu->arch.xive_esc_vaddr = (__force u64)xd->eoi_mmio;
Cédric Le Goater4f1c3f72020-12-10 18:14:39 +0100299 xd->flags |= XIVE_IRQ_FLAG_NO_EOI;
Benjamin Herrenschmidt9b9b13a2018-01-12 13:37:16 +1100300 }
301
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +1000302 return 0;
303error:
304 irq_dispose_mapping(xc->esc_virq[prio]);
305 xc->esc_virq[prio] = 0;
306 kfree(name);
307 return rc;
308}
309
310static int xive_provision_queue(struct kvm_vcpu *vcpu, u8 prio)
311{
312 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
313 struct kvmppc_xive *xive = xc->xive;
314 struct xive_q *q = &xc->queues[prio];
315 void *qpage;
316 int rc;
317
318 if (WARN_ON(q->qpage))
319 return 0;
320
321 /* Allocate the queue and retrieve infos on current node for now */
322 qpage = (__be32 *)__get_free_pages(GFP_KERNEL, xive->q_page_order);
323 if (!qpage) {
324 pr_err("Failed to allocate queue %d for VCPU %d\n",
325 prio, xc->server_num);
Ingo Molnared7158b2018-02-22 10:54:55 +0100326 return -ENOMEM;
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +1000327 }
328 memset(qpage, 0, 1 << xive->q_order);
329
330 /*
331 * Reconfigure the queue. This will set q->qpage only once the
332 * queue is fully configured. This is a requirement for prio 0
333 * as we will stop doing EOIs for every IPI as soon as we observe
334 * qpage being non-NULL, and instead will only EOI when we receive
335 * corresponding queue 0 entries
336 */
337 rc = xive_native_configure_queue(xc->vp_id, q, prio, qpage,
338 xive->q_order, true);
339 if (rc)
340 pr_err("Failed to configure queue %d for VCPU %d\n",
341 prio, xc->server_num);
342 return rc;
343}
344
Cédric Le Goater7e10b9a2019-05-24 15:20:30 +0200345/* Called with xive->lock held */
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +1000346static int xive_check_provisioning(struct kvm *kvm, u8 prio)
347{
348 struct kvmppc_xive *xive = kvm->arch.xive;
349 struct kvm_vcpu *vcpu;
350 int i, rc;
351
Cédric Le Goater7e10b9a2019-05-24 15:20:30 +0200352 lockdep_assert_held(&xive->lock);
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +1000353
354 /* Already provisioned ? */
355 if (xive->qmap & (1 << prio))
356 return 0;
357
358 pr_devel("Provisioning prio... %d\n", prio);
359
Benjamin Herrenschmidtbf4159da2018-01-12 13:37:12 +1100360 /* Provision each VCPU and enable escalations if needed */
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +1000361 kvm_for_each_vcpu(i, vcpu, kvm) {
362 if (!vcpu->arch.xive_vcpu)
363 continue;
364 rc = xive_provision_queue(vcpu, prio);
Benjamin Herrenschmidtbf4159da2018-01-12 13:37:12 +1100365 if (rc == 0 && !xive->single_escalation)
Cédric Le Goater13ce3292019-04-18 12:39:31 +0200366 kvmppc_xive_attach_escalation(vcpu, prio,
367 xive->single_escalation);
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +1000368 if (rc)
369 return rc;
370 }
371
372 /* Order previous stores and mark it as provisioned */
373 mb();
374 xive->qmap |= (1 << prio);
375 return 0;
376}
377
378static void xive_inc_q_pending(struct kvm *kvm, u32 server, u8 prio)
379{
380 struct kvm_vcpu *vcpu;
381 struct kvmppc_xive_vcpu *xc;
382 struct xive_q *q;
383
384 /* Locate target server */
385 vcpu = kvmppc_xive_find_server(kvm, server);
386 if (!vcpu) {
387 pr_warn("%s: Can't find server %d\n", __func__, server);
388 return;
389 }
390 xc = vcpu->arch.xive_vcpu;
391 if (WARN_ON(!xc))
392 return;
393
394 q = &xc->queues[prio];
395 atomic_inc(&q->pending_count);
396}
397
398static int xive_try_pick_queue(struct kvm_vcpu *vcpu, u8 prio)
399{
400 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
401 struct xive_q *q;
402 u32 max;
403
404 if (WARN_ON(!xc))
405 return -ENXIO;
406 if (!xc->valid)
407 return -ENXIO;
408
409 q = &xc->queues[prio];
410 if (WARN_ON(!q->qpage))
411 return -ENXIO;
412
413 /* Calculate max number of interrupts in that queue. */
414 max = (q->msk + 1) - XIVE_Q_GAP;
415 return atomic_add_unless(&q->count, 1, max) ? 0 : -EBUSY;
416}
417
Cédric Le Goatere8676ce2019-04-18 12:39:30 +0200418int kvmppc_xive_select_target(struct kvm *kvm, u32 *server, u8 prio)
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +1000419{
420 struct kvm_vcpu *vcpu;
421 int i, rc;
422
423 /* Locate target server */
424 vcpu = kvmppc_xive_find_server(kvm, *server);
425 if (!vcpu) {
426 pr_devel("Can't find server %d\n", *server);
427 return -EINVAL;
428 }
429
430 pr_devel("Finding irq target on 0x%x/%d...\n", *server, prio);
431
432 /* Try pick it */
433 rc = xive_try_pick_queue(vcpu, prio);
434 if (rc == 0)
435 return rc;
436
437 pr_devel(" .. failed, looking up candidate...\n");
438
439 /* Failed, pick another VCPU */
440 kvm_for_each_vcpu(i, vcpu, kvm) {
441 if (!vcpu->arch.xive_vcpu)
442 continue;
443 rc = xive_try_pick_queue(vcpu, prio);
444 if (rc == 0) {
445 *server = vcpu->arch.xive_vcpu->server_num;
446 pr_devel(" found on 0x%x/%d\n", *server, prio);
447 return rc;
448 }
449 }
450 pr_devel(" no available target !\n");
451
452 /* No available target ! */
453 return -EBUSY;
454}
455
456static u8 xive_lock_and_mask(struct kvmppc_xive *xive,
457 struct kvmppc_xive_src_block *sb,
458 struct kvmppc_xive_irq_state *state)
459{
460 struct xive_irq_data *xd;
461 u32 hw_num;
462 u8 old_prio;
463 u64 val;
464
465 /*
466 * Take the lock, set masked, try again if racing
467 * with H_EOI
468 */
469 for (;;) {
470 arch_spin_lock(&sb->lock);
471 old_prio = state->guest_priority;
472 state->guest_priority = MASKED;
473 mb();
474 if (!state->in_eoi)
475 break;
476 state->guest_priority = old_prio;
477 arch_spin_unlock(&sb->lock);
478 }
479
480 /* No change ? Bail */
481 if (old_prio == MASKED)
482 return old_prio;
483
484 /* Get the right irq */
485 kvmppc_xive_select_irq(state, &hw_num, &xd);
486
Cédric Le Goaterb5277d12020-12-10 18:14:46 +0100487 /* Set PQ to 10, return old P and old Q and remember them */
488 val = xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_10);
489 state->old_p = !!(val & 2);
490 state->old_q = !!(val & 1);
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +1000491
Cédric Le Goaterb5277d12020-12-10 18:14:46 +0100492 /*
493 * Synchronize hardware to sensure the queues are updated when
494 * masking
495 */
496 xive_native_sync_source(hw_num);
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +1000497
498 return old_prio;
499}
500
501static void xive_lock_for_unmask(struct kvmppc_xive_src_block *sb,
502 struct kvmppc_xive_irq_state *state)
503{
504 /*
505 * Take the lock try again if racing with H_EOI
506 */
507 for (;;) {
508 arch_spin_lock(&sb->lock);
509 if (!state->in_eoi)
510 break;
511 arch_spin_unlock(&sb->lock);
512 }
513}
514
515static void xive_finish_unmask(struct kvmppc_xive *xive,
516 struct kvmppc_xive_src_block *sb,
517 struct kvmppc_xive_irq_state *state,
518 u8 prio)
519{
520 struct xive_irq_data *xd;
521 u32 hw_num;
522
523 /* If we aren't changing a thing, move on */
524 if (state->guest_priority != MASKED)
525 goto bail;
526
527 /* Get the right irq */
528 kvmppc_xive_select_irq(state, &hw_num, &xd);
529
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +1000530 /* Old Q set, set PQ to 11 */
531 if (state->old_q)
532 xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_11);
533
534 /*
535 * If not old P, then perform an "effective" EOI,
536 * on the source. This will handle the cases where
537 * FW EOI is needed.
538 */
539 if (!state->old_p)
540 xive_vm_source_eoi(hw_num, xd);
541
542 /* Synchronize ordering and mark unmasked */
543 mb();
544bail:
545 state->guest_priority = prio;
546}
547
548/*
549 * Target an interrupt to a given server/prio, this will fallback
550 * to another server if necessary and perform the HW targetting
551 * updates as needed
552 *
553 * NOTE: Must be called with the state lock held
554 */
555static int xive_target_interrupt(struct kvm *kvm,
556 struct kvmppc_xive_irq_state *state,
557 u32 server, u8 prio)
558{
559 struct kvmppc_xive *xive = kvm->arch.xive;
560 u32 hw_num;
561 int rc;
562
563 /*
564 * This will return a tentative server and actual
565 * priority. The count for that new target will have
566 * already been incremented.
567 */
Cédric Le Goatere8676ce2019-04-18 12:39:30 +0200568 rc = kvmppc_xive_select_target(kvm, &server, prio);
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +1000569
570 /*
571 * We failed to find a target ? Not much we can do
572 * at least until we support the GIQ.
573 */
574 if (rc)
575 return rc;
576
577 /*
578 * Increment the old queue pending count if there
579 * was one so that the old queue count gets adjusted later
580 * when observed to be empty.
581 */
582 if (state->act_priority != MASKED)
583 xive_inc_q_pending(kvm,
584 state->act_server,
585 state->act_priority);
586 /*
587 * Update state and HW
588 */
589 state->act_priority = prio;
590 state->act_server = server;
591
592 /* Get the right irq */
593 kvmppc_xive_select_irq(state, &hw_num, NULL);
594
595 return xive_native_configure_irq(hw_num,
Cédric Le Goatereacc56b2019-04-18 12:39:28 +0200596 kvmppc_xive_vp(xive, server),
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +1000597 prio, state->number);
598}
599
600/*
601 * Targetting rules: In order to avoid losing track of
602 * pending interrupts accross mask and unmask, which would
603 * allow queue overflows, we implement the following rules:
604 *
605 * - Unless it was never enabled (or we run out of capacity)
606 * an interrupt is always targetted at a valid server/queue
607 * pair even when "masked" by the guest. This pair tends to
608 * be the last one used but it can be changed under some
609 * circumstances. That allows us to separate targetting
610 * from masking, we only handle accounting during (re)targetting,
611 * this also allows us to let an interrupt drain into its target
612 * queue after masking, avoiding complex schemes to remove
613 * interrupts out of remote processor queues.
614 *
615 * - When masking, we set PQ to 10 and save the previous value
616 * of P and Q.
617 *
618 * - When unmasking, if saved Q was set, we set PQ to 11
619 * otherwise we leave PQ to the HW state which will be either
620 * 10 if nothing happened or 11 if the interrupt fired while
621 * masked. Effectively we are OR'ing the previous Q into the
622 * HW Q.
623 *
624 * Then if saved P is clear, we do an effective EOI (Q->P->Trigger)
625 * which will unmask the interrupt and shoot a new one if Q was
626 * set.
627 *
628 * Otherwise (saved P is set) we leave PQ unchanged (so 10 or 11,
629 * effectively meaning an H_EOI from the guest is still expected
630 * for that interrupt).
631 *
632 * - If H_EOI occurs while masked, we clear the saved P.
633 *
634 * - When changing target, we account on the new target and
635 * increment a separate "pending" counter on the old one.
636 * This pending counter will be used to decrement the old
637 * target's count when its queue has been observed empty.
638 */
639
640int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
641 u32 priority)
642{
643 struct kvmppc_xive *xive = kvm->arch.xive;
644 struct kvmppc_xive_src_block *sb;
645 struct kvmppc_xive_irq_state *state;
646 u8 new_act_prio;
647 int rc = 0;
648 u16 idx;
649
650 if (!xive)
651 return -ENODEV;
652
653 pr_devel("set_xive ! irq 0x%x server 0x%x prio %d\n",
654 irq, server, priority);
655
656 /* First, check provisioning of queues */
Cédric Le Goater7e10b9a2019-05-24 15:20:30 +0200657 if (priority != MASKED) {
658 mutex_lock(&xive->lock);
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +1000659 rc = xive_check_provisioning(xive->kvm,
660 xive_prio_from_guest(priority));
Cédric Le Goater7e10b9a2019-05-24 15:20:30 +0200661 mutex_unlock(&xive->lock);
662 }
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +1000663 if (rc) {
664 pr_devel(" provisioning failure %d !\n", rc);
665 return rc;
666 }
667
668 sb = kvmppc_xive_find_source(xive, irq, &idx);
669 if (!sb)
670 return -EINVAL;
671 state = &sb->irq_state[idx];
672
673 /*
674 * We first handle masking/unmasking since the locking
675 * might need to be retried due to EOIs, we'll handle
676 * targetting changes later. These functions will return
677 * with the SB lock held.
678 *
679 * xive_lock_and_mask() will also set state->guest_priority
680 * but won't otherwise change other fields of the state.
681 *
682 * xive_lock_for_unmask will not actually unmask, this will
683 * be done later by xive_finish_unmask() once the targetting
684 * has been done, so we don't try to unmask an interrupt
685 * that hasn't yet been targetted.
686 */
687 if (priority == MASKED)
688 xive_lock_and_mask(xive, sb, state);
689 else
690 xive_lock_for_unmask(sb, state);
691
692
693 /*
694 * Then we handle targetting.
695 *
696 * First calculate a new "actual priority"
697 */
698 new_act_prio = state->act_priority;
699 if (priority != MASKED)
700 new_act_prio = xive_prio_from_guest(priority);
701
702 pr_devel(" new_act_prio=%x act_server=%x act_prio=%x\n",
703 new_act_prio, state->act_server, state->act_priority);
704
705 /*
706 * Then check if we actually need to change anything,
707 *
708 * The condition for re-targetting the interrupt is that
709 * we have a valid new priority (new_act_prio is not 0xff)
710 * and either the server or the priority changed.
711 *
712 * Note: If act_priority was ff and the new priority is
713 * also ff, we don't do anything and leave the interrupt
714 * untargetted. An attempt of doing an int_on on an
715 * untargetted interrupt will fail. If that is a problem
716 * we could initialize interrupts with valid default
717 */
718
719 if (new_act_prio != MASKED &&
720 (state->act_server != server ||
721 state->act_priority != new_act_prio))
722 rc = xive_target_interrupt(kvm, state, server, new_act_prio);
723
724 /*
725 * Perform the final unmasking of the interrupt source
726 * if necessary
727 */
728 if (priority != MASKED)
729 xive_finish_unmask(xive, sb, state, priority);
730
731 /*
732 * Finally Update saved_priority to match. Only int_on/off
733 * set this field to a different value.
734 */
735 state->saved_priority = priority;
736
737 arch_spin_unlock(&sb->lock);
738 return rc;
739}
740
741int kvmppc_xive_get_xive(struct kvm *kvm, u32 irq, u32 *server,
742 u32 *priority)
743{
744 struct kvmppc_xive *xive = kvm->arch.xive;
745 struct kvmppc_xive_src_block *sb;
746 struct kvmppc_xive_irq_state *state;
747 u16 idx;
748
749 if (!xive)
750 return -ENODEV;
751
752 sb = kvmppc_xive_find_source(xive, irq, &idx);
753 if (!sb)
754 return -EINVAL;
755 state = &sb->irq_state[idx];
756 arch_spin_lock(&sb->lock);
Sam Bobroff2fb1e942017-09-26 16:47:04 +1000757 *server = state->act_server;
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +1000758 *priority = state->guest_priority;
759 arch_spin_unlock(&sb->lock);
760
761 return 0;
762}
763
764int kvmppc_xive_int_on(struct kvm *kvm, u32 irq)
765{
766 struct kvmppc_xive *xive = kvm->arch.xive;
767 struct kvmppc_xive_src_block *sb;
768 struct kvmppc_xive_irq_state *state;
769 u16 idx;
770
771 if (!xive)
772 return -ENODEV;
773
774 sb = kvmppc_xive_find_source(xive, irq, &idx);
775 if (!sb)
776 return -EINVAL;
777 state = &sb->irq_state[idx];
778
779 pr_devel("int_on(irq=0x%x)\n", irq);
780
781 /*
782 * Check if interrupt was not targetted
783 */
784 if (state->act_priority == MASKED) {
785 pr_devel("int_on on untargetted interrupt\n");
786 return -EINVAL;
787 }
788
789 /* If saved_priority is 0xff, do nothing */
790 if (state->saved_priority == MASKED)
791 return 0;
792
793 /*
794 * Lock and unmask it.
795 */
796 xive_lock_for_unmask(sb, state);
797 xive_finish_unmask(xive, sb, state, state->saved_priority);
798 arch_spin_unlock(&sb->lock);
799
800 return 0;
801}
802
803int kvmppc_xive_int_off(struct kvm *kvm, u32 irq)
804{
805 struct kvmppc_xive *xive = kvm->arch.xive;
806 struct kvmppc_xive_src_block *sb;
807 struct kvmppc_xive_irq_state *state;
808 u16 idx;
809
810 if (!xive)
811 return -ENODEV;
812
813 sb = kvmppc_xive_find_source(xive, irq, &idx);
814 if (!sb)
815 return -EINVAL;
816 state = &sb->irq_state[idx];
817
818 pr_devel("int_off(irq=0x%x)\n", irq);
819
820 /*
821 * Lock and mask
822 */
823 state->saved_priority = xive_lock_and_mask(xive, sb, state);
824 arch_spin_unlock(&sb->lock);
825
826 return 0;
827}
828
829static bool xive_restore_pending_irq(struct kvmppc_xive *xive, u32 irq)
830{
831 struct kvmppc_xive_src_block *sb;
832 struct kvmppc_xive_irq_state *state;
833 u16 idx;
834
835 sb = kvmppc_xive_find_source(xive, irq, &idx);
836 if (!sb)
837 return false;
838 state = &sb->irq_state[idx];
839 if (!state->valid)
840 return false;
841
842 /*
843 * Trigger the IPI. This assumes we never restore a pass-through
844 * interrupt which should be safe enough
845 */
846 xive_irq_trigger(&state->ipi_data);
847
848 return true;
849}
850
851u64 kvmppc_xive_get_icp(struct kvm_vcpu *vcpu)
852{
853 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
854
855 if (!xc)
856 return 0;
857
858 /* Return the per-cpu state for state saving/migration */
859 return (u64)xc->cppr << KVM_REG_PPC_ICP_CPPR_SHIFT |
Laurent Vivier7333b5a2017-12-12 18:23:56 +0100860 (u64)xc->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT |
861 (u64)0xff << KVM_REG_PPC_ICP_PPRI_SHIFT;
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +1000862}
863
864int kvmppc_xive_set_icp(struct kvm_vcpu *vcpu, u64 icpval)
865{
866 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
867 struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
868 u8 cppr, mfrr;
869 u32 xisr;
870
871 if (!xc || !xive)
872 return -ENOENT;
873
874 /* Grab individual state fields. We don't use pending_pri */
875 cppr = icpval >> KVM_REG_PPC_ICP_CPPR_SHIFT;
876 xisr = (icpval >> KVM_REG_PPC_ICP_XISR_SHIFT) &
877 KVM_REG_PPC_ICP_XISR_MASK;
878 mfrr = icpval >> KVM_REG_PPC_ICP_MFRR_SHIFT;
879
880 pr_devel("set_icp vcpu %d cppr=0x%x mfrr=0x%x xisr=0x%x\n",
881 xc->server_num, cppr, mfrr, xisr);
882
883 /*
884 * We can't update the state of a "pushed" VCPU, but that
Paul Mackerras6f868402019-04-29 11:24:03 +1000885 * shouldn't happen because the vcpu->mutex makes running a
886 * vcpu mutually exclusive with doing one_reg get/set on it.
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +1000887 */
888 if (WARN_ON(vcpu->arch.xive_pushed))
889 return -EIO;
890
891 /* Update VCPU HW saved state */
892 vcpu->arch.xive_saved_state.cppr = cppr;
893 xc->hw_cppr = xc->cppr = cppr;
894
895 /*
896 * Update MFRR state. If it's not 0xff, we mark the VCPU as
897 * having a pending MFRR change, which will re-evaluate the
898 * target. The VCPU will thus potentially get a spurious
899 * interrupt but that's not a big deal.
900 */
901 xc->mfrr = mfrr;
902 if (mfrr < cppr)
903 xive_irq_trigger(&xc->vp_ipi_data);
904
905 /*
906 * Now saved XIRR is "interesting". It means there's something in
907 * the legacy "1 element" queue... for an IPI we simply ignore it,
908 * as the MFRR restore will handle that. For anything else we need
909 * to force a resend of the source.
910 * However the source may not have been setup yet. If that's the
911 * case, we keep that info and increment a counter in the xive to
912 * tell subsequent xive_set_source() to go look.
913 */
914 if (xisr > XICS_IPI && !xive_restore_pending_irq(xive, xisr)) {
915 xc->delayed_irq = xisr;
916 xive->delayed_irqs++;
917 pr_devel(" xisr restore delayed\n");
918 }
919
920 return 0;
921}
922
923int kvmppc_xive_set_mapped(struct kvm *kvm, unsigned long guest_irq,
924 struct irq_desc *host_desc)
925{
926 struct kvmppc_xive *xive = kvm->arch.xive;
927 struct kvmppc_xive_src_block *sb;
928 struct kvmppc_xive_irq_state *state;
929 struct irq_data *host_data = irq_desc_get_irq_data(host_desc);
930 unsigned int host_irq = irq_desc_get_irq(host_desc);
931 unsigned int hw_irq = (unsigned int)irqd_to_hwirq(host_data);
932 u16 idx;
933 u8 prio;
934 int rc;
935
936 if (!xive)
937 return -ENODEV;
938
939 pr_devel("set_mapped girq 0x%lx host HW irq 0x%x...\n",guest_irq, hw_irq);
940
941 sb = kvmppc_xive_find_source(xive, guest_irq, &idx);
942 if (!sb)
943 return -EINVAL;
944 state = &sb->irq_state[idx];
945
946 /*
947 * Mark the passed-through interrupt as going to a VCPU,
948 * this will prevent further EOIs and similar operations
949 * from the XIVE code. It will also mask the interrupt
950 * to either PQ=10 or 11 state, the latter if the interrupt
951 * is pending. This will allow us to unmask or retrigger it
952 * after routing it to the guest with a simple EOI.
953 *
954 * The "state" argument is a "token", all it needs is to be
955 * non-NULL to switch to passed-through or NULL for the
956 * other way around. We may not yet have an actual VCPU
957 * target here and we don't really care.
958 */
959 rc = irq_set_vcpu_affinity(host_irq, state);
960 if (rc) {
961 pr_err("Failed to set VCPU affinity for irq %d\n", host_irq);
962 return rc;
963 }
964
965 /*
966 * Mask and read state of IPI. We need to know if its P bit
967 * is set as that means it's potentially already using a
968 * queue entry in the target
969 */
970 prio = xive_lock_and_mask(xive, sb, state);
971 pr_devel(" old IPI prio %02x P:%d Q:%d\n", prio,
972 state->old_p, state->old_q);
973
974 /* Turn the IPI hard off */
975 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
976
Cédric Le Goater232b9842019-04-18 12:39:39 +0200977 /*
978 * Reset ESB guest mapping. Needed when ESB pages are exposed
979 * to the guest in XIVE native mode
980 */
981 if (xive->ops && xive->ops->reset_mapped)
982 xive->ops->reset_mapped(kvm, guest_irq);
983
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +1000984 /* Grab info about irq */
985 state->pt_number = hw_irq;
986 state->pt_data = irq_data_get_irq_handler_data(host_data);
987
988 /*
989 * Configure the IRQ to match the existing configuration of
990 * the IPI if it was already targetted. Otherwise this will
991 * mask the interrupt in a lossy way (act_priority is 0xff)
992 * which is fine for a never started interrupt.
993 */
994 xive_native_configure_irq(hw_irq,
Cédric Le Goatereacc56b2019-04-18 12:39:28 +0200995 kvmppc_xive_vp(xive, state->act_server),
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +1000996 state->act_priority, state->number);
997
998 /*
999 * We do an EOI to enable the interrupt (and retrigger if needed)
1000 * if the guest has the interrupt unmasked and the P bit was *not*
1001 * set in the IPI. If it was set, we know a slot may still be in
1002 * use in the target queue thus we have to wait for a guest
1003 * originated EOI
1004 */
1005 if (prio != MASKED && !state->old_p)
1006 xive_vm_source_eoi(hw_irq, state->pt_data);
1007
1008 /* Clear old_p/old_q as they are no longer relevant */
1009 state->old_p = state->old_q = false;
1010
1011 /* Restore guest prio (unlocks EOI) */
1012 mb();
1013 state->guest_priority = prio;
1014 arch_spin_unlock(&sb->lock);
1015
1016 return 0;
1017}
1018EXPORT_SYMBOL_GPL(kvmppc_xive_set_mapped);
1019
1020int kvmppc_xive_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
1021 struct irq_desc *host_desc)
1022{
1023 struct kvmppc_xive *xive = kvm->arch.xive;
1024 struct kvmppc_xive_src_block *sb;
1025 struct kvmppc_xive_irq_state *state;
1026 unsigned int host_irq = irq_desc_get_irq(host_desc);
1027 u16 idx;
1028 u8 prio;
1029 int rc;
1030
1031 if (!xive)
1032 return -ENODEV;
1033
1034 pr_devel("clr_mapped girq 0x%lx...\n", guest_irq);
1035
1036 sb = kvmppc_xive_find_source(xive, guest_irq, &idx);
1037 if (!sb)
1038 return -EINVAL;
1039 state = &sb->irq_state[idx];
1040
1041 /*
1042 * Mask and read state of IRQ. We need to know if its P bit
1043 * is set as that means it's potentially already using a
1044 * queue entry in the target
1045 */
1046 prio = xive_lock_and_mask(xive, sb, state);
1047 pr_devel(" old IRQ prio %02x P:%d Q:%d\n", prio,
1048 state->old_p, state->old_q);
1049
1050 /*
1051 * If old_p is set, the interrupt is pending, we switch it to
1052 * PQ=11. This will force a resend in the host so the interrupt
1053 * isn't lost to whatver host driver may pick it up
1054 */
1055 if (state->old_p)
1056 xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_11);
1057
1058 /* Release the passed-through interrupt to the host */
1059 rc = irq_set_vcpu_affinity(host_irq, NULL);
1060 if (rc) {
1061 pr_err("Failed to clr VCPU affinity for irq %d\n", host_irq);
1062 return rc;
1063 }
1064
1065 /* Forget about the IRQ */
1066 state->pt_number = 0;
1067 state->pt_data = NULL;
1068
Cédric Le Goater232b9842019-04-18 12:39:39 +02001069 /*
1070 * Reset ESB guest mapping. Needed when ESB pages are exposed
1071 * to the guest in XIVE native mode
1072 */
1073 if (xive->ops && xive->ops->reset_mapped) {
1074 xive->ops->reset_mapped(kvm, guest_irq);
1075 }
1076
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001077 /* Reconfigure the IPI */
1078 xive_native_configure_irq(state->ipi_number,
Cédric Le Goatereacc56b2019-04-18 12:39:28 +02001079 kvmppc_xive_vp(xive, state->act_server),
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001080 state->act_priority, state->number);
1081
1082 /*
1083 * If old_p is set (we have a queue entry potentially
1084 * occupied) or the interrupt is masked, we set the IPI
1085 * to PQ=10 state. Otherwise we just re-enable it (PQ=00).
1086 */
1087 if (prio == MASKED || state->old_p)
1088 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_10);
1089 else
1090 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_00);
1091
1092 /* Restore guest prio (unlocks EOI) */
1093 mb();
1094 state->guest_priority = prio;
1095 arch_spin_unlock(&sb->lock);
1096
1097 return 0;
1098}
1099EXPORT_SYMBOL_GPL(kvmppc_xive_clr_mapped);
1100
Cédric Le Goatereacc56b2019-04-18 12:39:28 +02001101void kvmppc_xive_disable_vcpu_interrupts(struct kvm_vcpu *vcpu)
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001102{
1103 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1104 struct kvm *kvm = vcpu->kvm;
1105 struct kvmppc_xive *xive = kvm->arch.xive;
1106 int i, j;
1107
1108 for (i = 0; i <= xive->max_sbid; i++) {
1109 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
1110
1111 if (!sb)
1112 continue;
1113 for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) {
1114 struct kvmppc_xive_irq_state *state = &sb->irq_state[j];
1115
1116 if (!state->valid)
1117 continue;
1118 if (state->act_priority == MASKED)
1119 continue;
1120 if (state->act_server != xc->server_num)
1121 continue;
1122
1123 /* Clean it up */
1124 arch_spin_lock(&sb->lock);
1125 state->act_priority = MASKED;
1126 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
1127 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
1128 if (state->pt_number) {
1129 xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01);
1130 xive_native_configure_irq(state->pt_number, 0, MASKED, 0);
1131 }
1132 arch_spin_unlock(&sb->lock);
1133 }
1134 }
Paul Mackerras0caecf52019-04-26 16:54:14 +10001135
1136 /* Disable vcpu's escalation interrupt */
1137 if (vcpu->arch.xive_esc_on) {
1138 __raw_readq((void __iomem *)(vcpu->arch.xive_esc_vaddr +
1139 XIVE_ESB_SET_PQ_01));
1140 vcpu->arch.xive_esc_on = false;
1141 }
1142
1143 /*
1144 * Clear pointers to escalation interrupt ESB.
1145 * This is safe because the vcpu->mutex is held, preventing
1146 * any other CPU from concurrently executing a KVM_RUN ioctl.
1147 */
1148 vcpu->arch.xive_esc_vaddr = 0;
1149 vcpu->arch.xive_esc_raddr = 0;
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001150}
1151
Paul Mackerrasda15c032019-08-13 20:06:48 +10001152/*
1153 * In single escalation mode, the escalation interrupt is marked so
1154 * that EOI doesn't re-enable it, but just sets the stale_p flag to
1155 * indicate that the P bit has already been dealt with. However, the
1156 * assembly code that enters the guest sets PQ to 00 without clearing
1157 * stale_p (because it has no easy way to address it). Hence we have
1158 * to adjust stale_p before shutting down the interrupt.
1159 */
1160void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
1161 struct kvmppc_xive_vcpu *xc, int irq)
1162{
1163 struct irq_data *d = irq_get_irq_data(irq);
1164 struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
1165
1166 /*
1167 * This slightly odd sequence gives the right result
1168 * (i.e. stale_p set if xive_esc_on is false) even if
1169 * we race with xive_esc_irq() and xive_irq_eoi().
1170 */
1171 xd->stale_p = false;
1172 smp_mb(); /* paired with smb_wmb in xive_esc_irq */
1173 if (!vcpu->arch.xive_esc_on)
1174 xd->stale_p = true;
1175}
1176
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001177void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
1178{
1179 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
Cédric Le Goater5422e952019-04-18 12:39:42 +02001180 struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001181 int i;
1182
Cédric Le Goater5422e952019-04-18 12:39:42 +02001183 if (!kvmppc_xics_enabled(vcpu))
1184 return;
1185
1186 if (!xc)
1187 return;
1188
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001189 pr_devel("cleanup_vcpu(cpu=%d)\n", xc->server_num);
1190
1191 /* Ensure no interrupt is still routed to that VP */
1192 xc->valid = false;
1193 kvmppc_xive_disable_vcpu_interrupts(vcpu);
1194
1195 /* Mask the VP IPI */
1196 xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_01);
1197
Cédric Le Goater237aed42019-08-06 19:25:38 +02001198 /* Free escalations */
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001199 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001200 if (xc->esc_virq[i]) {
Paul Mackerrasda15c032019-08-13 20:06:48 +10001201 if (xc->xive->single_escalation)
1202 xive_cleanup_single_escalation(vcpu, xc,
1203 xc->esc_virq[i]);
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001204 free_irq(xc->esc_virq[i], vcpu);
1205 irq_dispose_mapping(xc->esc_virq[i]);
1206 kfree(xc->esc_virq_names[i]);
1207 }
Cédric Le Goater237aed42019-08-06 19:25:38 +02001208 }
1209
1210 /* Disable the VP */
1211 xive_native_disable_vp(xc->vp_id);
1212
Paul Mackerras8d4ba9c2019-08-13 20:01:00 +10001213 /* Clear the cam word so guest entry won't try to push context */
1214 vcpu->arch.xive_cam_word = 0;
1215
Cédric Le Goater237aed42019-08-06 19:25:38 +02001216 /* Free the queues */
1217 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
1218 struct xive_q *q = &xc->queues[i];
1219
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001220 xive_native_disable_queue(xc->vp_id, q, i);
1221 if (q->qpage) {
1222 free_pages((unsigned long)q->qpage,
1223 xive->q_page_order);
1224 q->qpage = NULL;
1225 }
1226 }
1227
1228 /* Free the IPI */
1229 if (xc->vp_ipi) {
1230 xive_cleanup_irq_data(&xc->vp_ipi_data);
1231 xive_native_free_irq(xc->vp_ipi);
1232 }
1233 /* Free the VP */
1234 kfree(xc);
Cédric Le Goater5422e952019-04-18 12:39:42 +02001235
1236 /* Cleanup the vcpu */
1237 vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
1238 vcpu->arch.xive_vcpu = NULL;
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001239}
1240
Greg Kurz8db29ea2019-09-27 13:53:55 +02001241static bool kvmppc_xive_vcpu_id_valid(struct kvmppc_xive *xive, u32 cpu)
1242{
Greg Kurz062cfab2019-09-27 13:54:01 +02001243 /* We have a block of xive->nr_servers VPs. We just need to check
Greg Kurzf54db392020-11-30 13:19:27 +01001244 * packed vCPU ids are below that.
Greg Kurz8db29ea2019-09-27 13:53:55 +02001245 */
Greg Kurzf54db392020-11-30 13:19:27 +01001246 return kvmppc_pack_vcpu_id(xive->kvm, cpu) < xive->nr_servers;
Greg Kurz8db29ea2019-09-27 13:53:55 +02001247}
1248
1249int kvmppc_xive_compute_vp_id(struct kvmppc_xive *xive, u32 cpu, u32 *vp)
1250{
1251 u32 vp_id;
1252
1253 if (!kvmppc_xive_vcpu_id_valid(xive, cpu)) {
1254 pr_devel("Out of bounds !\n");
1255 return -EINVAL;
1256 }
1257
Greg Kurz062cfab2019-09-27 13:54:01 +02001258 if (xive->vp_base == XIVE_INVALID_VP) {
1259 xive->vp_base = xive_native_alloc_vp_block(xive->nr_servers);
1260 pr_devel("VP_Base=%x nr_servers=%d\n", xive->vp_base, xive->nr_servers);
1261
1262 if (xive->vp_base == XIVE_INVALID_VP)
1263 return -ENOSPC;
1264 }
1265
Greg Kurz8db29ea2019-09-27 13:53:55 +02001266 vp_id = kvmppc_xive_vp(xive, cpu);
1267 if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) {
1268 pr_devel("Duplicate !\n");
1269 return -EEXIST;
1270 }
1271
1272 *vp = vp_id;
1273
1274 return 0;
1275}
1276
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001277int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
1278 struct kvm_vcpu *vcpu, u32 cpu)
1279{
1280 struct kvmppc_xive *xive = dev->private;
1281 struct kvmppc_xive_vcpu *xc;
1282 int i, r = -EBUSY;
Greg Kurz12ade69c2019-09-27 13:53:43 +02001283 u32 vp_id;
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001284
1285 pr_devel("connect_vcpu(cpu=%d)\n", cpu);
1286
1287 if (dev->ops != &kvm_xive_ops) {
1288 pr_devel("Wrong ops !\n");
1289 return -EPERM;
1290 }
1291 if (xive->kvm != vcpu->kvm)
1292 return -EPERM;
Cédric Le Goater5422e952019-04-18 12:39:42 +02001293 if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001294 return -EBUSY;
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001295
1296 /* We need to synchronize with queue provisioning */
Cédric Le Goater7e10b9a2019-05-24 15:20:30 +02001297 mutex_lock(&xive->lock);
Greg Kurz12ade69c2019-09-27 13:53:43 +02001298
Greg Kurz8db29ea2019-09-27 13:53:55 +02001299 r = kvmppc_xive_compute_vp_id(xive, cpu, &vp_id);
1300 if (r)
Greg Kurz12ade69c2019-09-27 13:53:43 +02001301 goto bail;
Greg Kurz12ade69c2019-09-27 13:53:43 +02001302
1303 xc = kzalloc(sizeof(*xc), GFP_KERNEL);
1304 if (!xc) {
1305 r = -ENOMEM;
1306 goto bail;
1307 }
1308
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001309 vcpu->arch.xive_vcpu = xc;
1310 xc->xive = xive;
1311 xc->vcpu = vcpu;
1312 xc->server_num = cpu;
Greg Kurz12ade69c2019-09-27 13:53:43 +02001313 xc->vp_id = vp_id;
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001314 xc->mfrr = 0xff;
1315 xc->valid = true;
1316
1317 r = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);
1318 if (r)
1319 goto bail;
1320
1321 /* Configure VCPU fields for use by assembly push/pull */
1322 vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
1323 vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
1324
1325 /* Allocate IPI */
1326 xc->vp_ipi = xive_native_alloc_irq();
1327 if (!xc->vp_ipi) {
Benjamin Herrenschmidtbf4159da2018-01-12 13:37:12 +11001328 pr_err("Failed to allocate xive irq for VCPU IPI\n");
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001329 r = -EIO;
1330 goto bail;
1331 }
1332 pr_devel(" IPI=0x%x\n", xc->vp_ipi);
1333
1334 r = xive_native_populate_irq_data(xc->vp_ipi, &xc->vp_ipi_data);
1335 if (r)
1336 goto bail;
1337
1338 /*
Benjamin Herrenschmidtbf4159da2018-01-12 13:37:12 +11001339 * Enable the VP first as the single escalation mode will
1340 * affect escalation interrupts numbering
1341 */
1342 r = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
1343 if (r) {
1344 pr_err("Failed to enable VP in OPAL, err %d\n", r);
1345 goto bail;
1346 }
1347
1348 /*
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001349 * Initialize queues. Initially we set them all for no queueing
1350 * and we enable escalation for queue 0 only which we'll use for
1351 * our mfrr change notifications. If the VCPU is hot-plugged, we
Benjamin Herrenschmidtbf4159da2018-01-12 13:37:12 +11001352 * do handle provisioning however based on the existing "map"
1353 * of enabled queues.
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001354 */
1355 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
1356 struct xive_q *q = &xc->queues[i];
1357
Benjamin Herrenschmidtbf4159da2018-01-12 13:37:12 +11001358 /* Single escalation, no queue 7 */
1359 if (i == 7 && xive->single_escalation)
1360 break;
1361
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001362 /* Is queue already enabled ? Provision it */
1363 if (xive->qmap & (1 << i)) {
1364 r = xive_provision_queue(vcpu, i);
Benjamin Herrenschmidtbf4159da2018-01-12 13:37:12 +11001365 if (r == 0 && !xive->single_escalation)
Cédric Le Goater13ce3292019-04-18 12:39:31 +02001366 kvmppc_xive_attach_escalation(
1367 vcpu, i, xive->single_escalation);
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001368 if (r)
1369 goto bail;
1370 } else {
1371 r = xive_native_configure_queue(xc->vp_id,
1372 q, i, NULL, 0, true);
1373 if (r) {
1374 pr_err("Failed to configure queue %d for VCPU %d\n",
1375 i, cpu);
1376 goto bail;
1377 }
1378 }
1379 }
1380
1381 /* If not done above, attach priority 0 escalation */
Cédric Le Goater13ce3292019-04-18 12:39:31 +02001382 r = kvmppc_xive_attach_escalation(vcpu, 0, xive->single_escalation);
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001383 if (r)
1384 goto bail;
1385
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001386 /* Route the IPI */
1387 r = xive_native_configure_irq(xc->vp_ipi, xc->vp_id, 0, XICS_IPI);
1388 if (!r)
1389 xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_00);
1390
1391bail:
Cédric Le Goater7e10b9a2019-05-24 15:20:30 +02001392 mutex_unlock(&xive->lock);
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001393 if (r) {
1394 kvmppc_xive_cleanup_vcpu(vcpu);
1395 return r;
1396 }
1397
1398 vcpu->arch.irq_type = KVMPPC_IRQ_XICS;
1399 return 0;
1400}
1401
1402/*
1403 * Scanning of queues before/after migration save
1404 */
1405static void xive_pre_save_set_queued(struct kvmppc_xive *xive, u32 irq)
1406{
1407 struct kvmppc_xive_src_block *sb;
1408 struct kvmppc_xive_irq_state *state;
1409 u16 idx;
1410
1411 sb = kvmppc_xive_find_source(xive, irq, &idx);
1412 if (!sb)
1413 return;
1414
1415 state = &sb->irq_state[idx];
1416
1417 /* Some sanity checking */
1418 if (!state->valid) {
1419 pr_err("invalid irq 0x%x in cpu queue!\n", irq);
1420 return;
1421 }
1422
1423 /*
1424 * If the interrupt is in a queue it should have P set.
1425 * We warn so that gets reported. A backtrace isn't useful
1426 * so no need to use a WARN_ON.
1427 */
1428 if (!state->saved_p)
1429 pr_err("Interrupt 0x%x is marked in a queue but P not set !\n", irq);
1430
1431 /* Set flag */
1432 state->in_queue = true;
1433}
1434
1435static void xive_pre_save_mask_irq(struct kvmppc_xive *xive,
1436 struct kvmppc_xive_src_block *sb,
1437 u32 irq)
1438{
1439 struct kvmppc_xive_irq_state *state = &sb->irq_state[irq];
1440
1441 if (!state->valid)
1442 return;
1443
1444 /* Mask and save state, this will also sync HW queues */
1445 state->saved_scan_prio = xive_lock_and_mask(xive, sb, state);
1446
1447 /* Transfer P and Q */
1448 state->saved_p = state->old_p;
1449 state->saved_q = state->old_q;
1450
1451 /* Unlock */
1452 arch_spin_unlock(&sb->lock);
1453}
1454
1455static void xive_pre_save_unmask_irq(struct kvmppc_xive *xive,
1456 struct kvmppc_xive_src_block *sb,
1457 u32 irq)
1458{
1459 struct kvmppc_xive_irq_state *state = &sb->irq_state[irq];
1460
1461 if (!state->valid)
1462 return;
1463
1464 /*
1465 * Lock / exclude EOI (not technically necessary if the
1466 * guest isn't running concurrently. If this becomes a
1467 * performance issue we can probably remove the lock.
1468 */
1469 xive_lock_for_unmask(sb, state);
1470
1471 /* Restore mask/prio if it wasn't masked */
1472 if (state->saved_scan_prio != MASKED)
1473 xive_finish_unmask(xive, sb, state, state->saved_scan_prio);
1474
1475 /* Unlock */
1476 arch_spin_unlock(&sb->lock);
1477}
1478
1479static void xive_pre_save_queue(struct kvmppc_xive *xive, struct xive_q *q)
1480{
1481 u32 idx = q->idx;
1482 u32 toggle = q->toggle;
1483 u32 irq;
1484
1485 do {
1486 irq = __xive_read_eq(q->qpage, q->msk, &idx, &toggle);
1487 if (irq > XICS_IPI)
1488 xive_pre_save_set_queued(xive, irq);
1489 } while(irq);
1490}
1491
1492static void xive_pre_save_scan(struct kvmppc_xive *xive)
1493{
1494 struct kvm_vcpu *vcpu = NULL;
1495 int i, j;
1496
1497 /*
1498 * See comment in xive_get_source() about how this
1499 * work. Collect a stable state for all interrupts
1500 */
1501 for (i = 0; i <= xive->max_sbid; i++) {
1502 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
1503 if (!sb)
1504 continue;
1505 for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++)
1506 xive_pre_save_mask_irq(xive, sb, j);
1507 }
1508
1509 /* Then scan the queues and update the "in_queue" flag */
1510 kvm_for_each_vcpu(i, vcpu, xive->kvm) {
1511 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1512 if (!xc)
1513 continue;
1514 for (j = 0; j < KVMPPC_XIVE_Q_COUNT; j++) {
Paul Mackerras00c14752017-06-30 16:39:55 +10001515 if (xc->queues[j].qpage)
1516 xive_pre_save_queue(xive, &xc->queues[j]);
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001517 }
1518 }
1519
1520 /* Finally restore interrupt states */
1521 for (i = 0; i <= xive->max_sbid; i++) {
1522 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
1523 if (!sb)
1524 continue;
1525 for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++)
1526 xive_pre_save_unmask_irq(xive, sb, j);
1527 }
1528}
1529
1530static void xive_post_save_scan(struct kvmppc_xive *xive)
1531{
1532 u32 i, j;
1533
1534 /* Clear all the in_queue flags */
1535 for (i = 0; i <= xive->max_sbid; i++) {
1536 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
1537 if (!sb)
1538 continue;
1539 for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++)
1540 sb->irq_state[j].in_queue = false;
1541 }
1542
1543 /* Next get_source() will do a new scan */
1544 xive->saved_src_count = 0;
1545}
1546
1547/*
1548 * This returns the source configuration and state to user space.
1549 */
1550static int xive_get_source(struct kvmppc_xive *xive, long irq, u64 addr)
1551{
1552 struct kvmppc_xive_src_block *sb;
1553 struct kvmppc_xive_irq_state *state;
1554 u64 __user *ubufp = (u64 __user *) addr;
1555 u64 val, prio;
1556 u16 idx;
1557
1558 sb = kvmppc_xive_find_source(xive, irq, &idx);
1559 if (!sb)
1560 return -ENOENT;
1561
1562 state = &sb->irq_state[idx];
1563
1564 if (!state->valid)
1565 return -ENOENT;
1566
1567 pr_devel("get_source(%ld)...\n", irq);
1568
1569 /*
1570 * So to properly save the state into something that looks like a
1571 * XICS migration stream we cannot treat interrupts individually.
1572 *
1573 * We need, instead, mask them all (& save their previous PQ state)
1574 * to get a stable state in the HW, then sync them to ensure that
1575 * any interrupt that had already fired hits its queue, and finally
1576 * scan all the queues to collect which interrupts are still present
1577 * in the queues, so we can set the "pending" flag on them and
1578 * they can be resent on restore.
1579 *
1580 * So we do it all when the "first" interrupt gets saved, all the
1581 * state is collected at that point, the rest of xive_get_source()
1582 * will merely collect and convert that state to the expected
1583 * userspace bit mask.
1584 */
1585 if (xive->saved_src_count == 0)
1586 xive_pre_save_scan(xive);
1587 xive->saved_src_count++;
1588
1589 /* Convert saved state into something compatible with xics */
Sam Bobroff2fb1e942017-09-26 16:47:04 +10001590 val = state->act_server;
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001591 prio = state->saved_scan_prio;
1592
1593 if (prio == MASKED) {
1594 val |= KVM_XICS_MASKED;
1595 prio = state->saved_priority;
1596 }
1597 val |= prio << KVM_XICS_PRIORITY_SHIFT;
1598 if (state->lsi) {
1599 val |= KVM_XICS_LEVEL_SENSITIVE;
1600 if (state->saved_p)
1601 val |= KVM_XICS_PENDING;
1602 } else {
1603 if (state->saved_p)
1604 val |= KVM_XICS_PRESENTED;
1605
1606 if (state->saved_q)
1607 val |= KVM_XICS_QUEUED;
1608
1609 /*
1610 * We mark it pending (which will attempt a re-delivery)
1611 * if we are in a queue *or* we were masked and had
1612 * Q set which is equivalent to the XICS "masked pending"
1613 * state
1614 */
1615 if (state->in_queue || (prio == MASKED && state->saved_q))
1616 val |= KVM_XICS_PENDING;
1617 }
1618
1619 /*
1620 * If that was the last interrupt saved, reset the
1621 * in_queue flags
1622 */
1623 if (xive->saved_src_count == xive->src_count)
1624 xive_post_save_scan(xive);
1625
1626 /* Copy the result to userspace */
1627 if (put_user(val, ubufp))
1628 return -EFAULT;
1629
1630 return 0;
1631}
1632
Cédric Le Goater4131f832019-04-18 12:39:29 +02001633struct kvmppc_xive_src_block *kvmppc_xive_create_src_block(
1634 struct kvmppc_xive *xive, int irq)
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001635{
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001636 struct kvmppc_xive_src_block *sb;
1637 int i, bid;
1638
1639 bid = irq >> KVMPPC_XICS_ICS_SHIFT;
1640
Cédric Le Goater7e10b9a2019-05-24 15:20:30 +02001641 mutex_lock(&xive->lock);
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001642
1643 /* block already exists - somebody else got here first */
1644 if (xive->src_blocks[bid])
1645 goto out;
1646
1647 /* Create the ICS */
1648 sb = kzalloc(sizeof(*sb), GFP_KERNEL);
1649 if (!sb)
1650 goto out;
1651
1652 sb->id = bid;
1653
1654 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
1655 sb->irq_state[i].number = (bid << KVMPPC_XICS_ICS_SHIFT) | i;
Cédric Le Goatere8676ce2019-04-18 12:39:30 +02001656 sb->irq_state[i].eisn = 0;
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001657 sb->irq_state[i].guest_priority = MASKED;
1658 sb->irq_state[i].saved_priority = MASKED;
1659 sb->irq_state[i].act_priority = MASKED;
1660 }
1661 smp_wmb();
1662 xive->src_blocks[bid] = sb;
1663
1664 if (bid > xive->max_sbid)
1665 xive->max_sbid = bid;
1666
1667out:
Cédric Le Goater7e10b9a2019-05-24 15:20:30 +02001668 mutex_unlock(&xive->lock);
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001669 return xive->src_blocks[bid];
1670}
1671
1672static bool xive_check_delayed_irq(struct kvmppc_xive *xive, u32 irq)
1673{
1674 struct kvm *kvm = xive->kvm;
1675 struct kvm_vcpu *vcpu = NULL;
1676 int i;
1677
1678 kvm_for_each_vcpu(i, vcpu, kvm) {
1679 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1680
1681 if (!xc)
1682 continue;
1683
1684 if (xc->delayed_irq == irq) {
1685 xc->delayed_irq = 0;
1686 xive->delayed_irqs--;
1687 return true;
1688 }
1689 }
1690 return false;
1691}
1692
1693static int xive_set_source(struct kvmppc_xive *xive, long irq, u64 addr)
1694{
1695 struct kvmppc_xive_src_block *sb;
1696 struct kvmppc_xive_irq_state *state;
1697 u64 __user *ubufp = (u64 __user *) addr;
1698 u16 idx;
1699 u64 val;
1700 u8 act_prio, guest_prio;
1701 u32 server;
1702 int rc = 0;
1703
1704 if (irq < KVMPPC_XICS_FIRST_IRQ || irq >= KVMPPC_XICS_NR_IRQS)
1705 return -ENOENT;
1706
1707 pr_devel("set_source(irq=0x%lx)\n", irq);
1708
1709 /* Find the source */
1710 sb = kvmppc_xive_find_source(xive, irq, &idx);
1711 if (!sb) {
1712 pr_devel("No source, creating source block...\n");
Cédric Le Goater4131f832019-04-18 12:39:29 +02001713 sb = kvmppc_xive_create_src_block(xive, irq);
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001714 if (!sb) {
1715 pr_devel("Failed to create block...\n");
1716 return -ENOMEM;
1717 }
1718 }
1719 state = &sb->irq_state[idx];
1720
1721 /* Read user passed data */
1722 if (get_user(val, ubufp)) {
1723 pr_devel("fault getting user info !\n");
1724 return -EFAULT;
1725 }
1726
1727 server = val & KVM_XICS_DESTINATION_MASK;
1728 guest_prio = val >> KVM_XICS_PRIORITY_SHIFT;
1729
1730 pr_devel(" val=0x016%llx (server=0x%x, guest_prio=%d)\n",
1731 val, server, guest_prio);
Benjamin Herrenschmidtbf4159da2018-01-12 13:37:12 +11001732
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001733 /*
1734 * If the source doesn't already have an IPI, allocate
1735 * one and get the corresponding data
1736 */
1737 if (!state->ipi_number) {
1738 state->ipi_number = xive_native_alloc_irq();
1739 if (state->ipi_number == 0) {
1740 pr_devel("Failed to allocate IPI !\n");
1741 return -ENOMEM;
1742 }
1743 xive_native_populate_irq_data(state->ipi_number, &state->ipi_data);
1744 pr_devel(" src_ipi=0x%x\n", state->ipi_number);
1745 }
1746
1747 /*
1748 * We use lock_and_mask() to set us in the right masked
1749 * state. We will override that state from the saved state
1750 * further down, but this will handle the cases of interrupts
1751 * that need FW masking. We set the initial guest_priority to
1752 * 0 before calling it to ensure it actually performs the masking.
1753 */
1754 state->guest_priority = 0;
1755 xive_lock_and_mask(xive, sb, state);
1756
1757 /*
1758 * Now, we select a target if we have one. If we don't we
1759 * leave the interrupt untargetted. It means that an interrupt
1760 * can become "untargetted" accross migration if it was masked
1761 * by set_xive() but there is little we can do about it.
1762 */
1763
1764 /* First convert prio and mark interrupt as untargetted */
1765 act_prio = xive_prio_from_guest(guest_prio);
1766 state->act_priority = MASKED;
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001767
1768 /*
1769 * We need to drop the lock due to the mutex below. Hopefully
1770 * nothing is touching that interrupt yet since it hasn't been
1771 * advertized to a running guest yet
1772 */
1773 arch_spin_unlock(&sb->lock);
1774
1775 /* If we have a priority target the interrupt */
1776 if (act_prio != MASKED) {
1777 /* First, check provisioning of queues */
Cédric Le Goater7e10b9a2019-05-24 15:20:30 +02001778 mutex_lock(&xive->lock);
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001779 rc = xive_check_provisioning(xive->kvm, act_prio);
Cédric Le Goater7e10b9a2019-05-24 15:20:30 +02001780 mutex_unlock(&xive->lock);
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001781
1782 /* Target interrupt */
1783 if (rc == 0)
1784 rc = xive_target_interrupt(xive->kvm, state,
1785 server, act_prio);
1786 /*
1787 * If provisioning or targetting failed, leave it
1788 * alone and masked. It will remain disabled until
1789 * the guest re-targets it.
1790 */
1791 }
1792
1793 /*
1794 * Find out if this was a delayed irq stashed in an ICP,
1795 * in which case, treat it as pending
1796 */
1797 if (xive->delayed_irqs && xive_check_delayed_irq(xive, irq)) {
1798 val |= KVM_XICS_PENDING;
1799 pr_devel(" Found delayed ! forcing PENDING !\n");
1800 }
1801
1802 /* Cleanup the SW state */
1803 state->old_p = false;
1804 state->old_q = false;
1805 state->lsi = false;
1806 state->asserted = false;
1807
1808 /* Restore LSI state */
1809 if (val & KVM_XICS_LEVEL_SENSITIVE) {
1810 state->lsi = true;
1811 if (val & KVM_XICS_PENDING)
1812 state->asserted = true;
1813 pr_devel(" LSI ! Asserted=%d\n", state->asserted);
1814 }
1815
1816 /*
1817 * Restore P and Q. If the interrupt was pending, we
Cédric Le Goaterdc1c4162017-12-12 12:02:04 +00001818 * force Q and !P, which will trigger a resend.
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001819 *
1820 * That means that a guest that had both an interrupt
1821 * pending (queued) and Q set will restore with only
1822 * one instance of that interrupt instead of 2, but that
1823 * is perfectly fine as coalescing interrupts that haven't
1824 * been presented yet is always allowed.
1825 */
Cédric Le Goaterdc1c4162017-12-12 12:02:04 +00001826 if (val & KVM_XICS_PRESENTED && !(val & KVM_XICS_PENDING))
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001827 state->old_p = true;
1828 if (val & KVM_XICS_QUEUED || val & KVM_XICS_PENDING)
1829 state->old_q = true;
1830
1831 pr_devel(" P=%d, Q=%d\n", state->old_p, state->old_q);
1832
1833 /*
1834 * If the interrupt was unmasked, update guest priority and
1835 * perform the appropriate state transition and do a
1836 * re-trigger if necessary.
1837 */
1838 if (val & KVM_XICS_MASKED) {
1839 pr_devel(" masked, saving prio\n");
1840 state->guest_priority = MASKED;
1841 state->saved_priority = guest_prio;
1842 } else {
1843 pr_devel(" unmasked, restoring to prio %d\n", guest_prio);
1844 xive_finish_unmask(xive, sb, state, guest_prio);
1845 state->saved_priority = guest_prio;
1846 }
1847
1848 /* Increment the number of valid sources and mark this one valid */
1849 if (!state->valid)
1850 xive->src_count++;
1851 state->valid = true;
1852
1853 return 0;
1854}
1855
1856int kvmppc_xive_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
1857 bool line_status)
1858{
1859 struct kvmppc_xive *xive = kvm->arch.xive;
1860 struct kvmppc_xive_src_block *sb;
1861 struct kvmppc_xive_irq_state *state;
1862 u16 idx;
1863
1864 if (!xive)
1865 return -ENODEV;
1866
1867 sb = kvmppc_xive_find_source(xive, irq, &idx);
1868 if (!sb)
1869 return -EINVAL;
1870
1871 /* Perform locklessly .... (we need to do some RCUisms here...) */
1872 state = &sb->irq_state[idx];
1873 if (!state->valid)
1874 return -EINVAL;
1875
1876 /* We don't allow a trigger on a passed-through interrupt */
1877 if (state->pt_number)
1878 return -EINVAL;
1879
1880 if ((level == 1 && state->lsi) || level == KVM_INTERRUPT_SET_LEVEL)
Jiapeng Chongc9df3f82021-02-07 14:43:12 +08001881 state->asserted = true;
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001882 else if (level == 0 || level == KVM_INTERRUPT_UNSET) {
Jiapeng Chongc9df3f82021-02-07 14:43:12 +08001883 state->asserted = false;
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001884 return 0;
1885 }
1886
1887 /* Trigger the IPI */
1888 xive_irq_trigger(&state->ipi_data);
1889
1890 return 0;
1891}
1892
Greg Kurz062cfab2019-09-27 13:54:01 +02001893int kvmppc_xive_set_nr_servers(struct kvmppc_xive *xive, u64 addr)
1894{
1895 u32 __user *ubufp = (u32 __user *) addr;
1896 u32 nr_servers;
1897 int rc = 0;
1898
1899 if (get_user(nr_servers, ubufp))
1900 return -EFAULT;
1901
1902 pr_devel("%s nr_servers=%u\n", __func__, nr_servers);
1903
1904 if (!nr_servers || nr_servers > KVM_MAX_VCPU_ID)
1905 return -EINVAL;
1906
1907 mutex_lock(&xive->lock);
1908 if (xive->vp_base != XIVE_INVALID_VP)
1909 /* The VP block is allocated once and freed when the device
1910 * is released. Better not allow to change its size since its
1911 * used by connect_vcpu to validate vCPU ids are valid (eg,
1912 * setting it back to a higher value could allow connect_vcpu
1913 * to come up with a VP id that goes beyond the VP block, which
1914 * is likely to cause a crash in OPAL).
1915 */
1916 rc = -EBUSY;
1917 else if (nr_servers > KVM_MAX_VCPUS)
1918 /* We don't need more servers. Higher vCPU ids get packed
1919 * down below KVM_MAX_VCPUS by kvmppc_pack_vcpu_id().
1920 */
1921 xive->nr_servers = KVM_MAX_VCPUS;
1922 else
1923 xive->nr_servers = nr_servers;
1924
1925 mutex_unlock(&xive->lock);
1926
1927 return rc;
1928}
1929
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001930static int xive_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1931{
1932 struct kvmppc_xive *xive = dev->private;
1933
1934 /* We honor the existing XICS ioctl */
1935 switch (attr->group) {
1936 case KVM_DEV_XICS_GRP_SOURCES:
1937 return xive_set_source(xive, attr->attr, attr->addr);
Greg Kurzefe5ddc2019-09-27 13:54:07 +02001938 case KVM_DEV_XICS_GRP_CTRL:
1939 switch (attr->attr) {
1940 case KVM_DEV_XICS_NR_SERVERS:
1941 return kvmppc_xive_set_nr_servers(xive, attr->addr);
1942 }
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001943 }
1944 return -ENXIO;
1945}
1946
1947static int xive_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1948{
1949 struct kvmppc_xive *xive = dev->private;
1950
1951 /* We honor the existing XICS ioctl */
1952 switch (attr->group) {
1953 case KVM_DEV_XICS_GRP_SOURCES:
1954 return xive_get_source(xive, attr->attr, attr->addr);
1955 }
1956 return -ENXIO;
1957}
1958
1959static int xive_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
1960{
1961 /* We honor the same limits as XICS, at least for now */
1962 switch (attr->group) {
1963 case KVM_DEV_XICS_GRP_SOURCES:
1964 if (attr->attr >= KVMPPC_XICS_FIRST_IRQ &&
1965 attr->attr < KVMPPC_XICS_NR_IRQS)
1966 return 0;
1967 break;
Greg Kurzefe5ddc2019-09-27 13:54:07 +02001968 case KVM_DEV_XICS_GRP_CTRL:
1969 switch (attr->attr) {
1970 case KVM_DEV_XICS_NR_SERVERS:
1971 return 0;
1972 }
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001973 }
1974 return -ENXIO;
1975}
1976
1977static void kvmppc_xive_cleanup_irq(u32 hw_num, struct xive_irq_data *xd)
1978{
1979 xive_vm_esb_load(xd, XIVE_ESB_SET_PQ_01);
1980 xive_native_configure_irq(hw_num, 0, MASKED, 0);
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001981}
1982
Cédric Le Goater4131f832019-04-18 12:39:29 +02001983void kvmppc_xive_free_sources(struct kvmppc_xive_src_block *sb)
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001984{
1985 int i;
1986
1987 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
1988 struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
1989
1990 if (!state->valid)
1991 continue;
1992
1993 kvmppc_xive_cleanup_irq(state->ipi_number, &state->ipi_data);
Cédric Le Goateref974022019-05-28 14:17:15 +02001994 xive_cleanup_irq_data(&state->ipi_data);
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001995 xive_native_free_irq(state->ipi_number);
1996
Cédric Le Goateref974022019-05-28 14:17:15 +02001997 /* Pass-through, cleanup too but keep IRQ hw data */
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10001998 if (state->pt_number)
1999 kvmppc_xive_cleanup_irq(state->pt_number, state->pt_data);
2000
2001 state->valid = false;
2002 }
2003}
2004
Cédric Le Goater5422e952019-04-18 12:39:42 +02002005/*
Paul Mackerras6f868402019-04-29 11:24:03 +10002006 * Called when device fd is closed. kvm->lock is held.
Cédric Le Goater5422e952019-04-18 12:39:42 +02002007 */
2008static void kvmppc_xive_release(struct kvm_device *dev)
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10002009{
2010 struct kvmppc_xive *xive = dev->private;
2011 struct kvm *kvm = xive->kvm;
Cédric Le Goater5422e952019-04-18 12:39:42 +02002012 struct kvm_vcpu *vcpu;
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10002013 int i;
2014
Cédric Le Goater5422e952019-04-18 12:39:42 +02002015 pr_devel("Releasing xive device\n");
2016
Paul Mackerras6f868402019-04-29 11:24:03 +10002017 /*
Paul Mackerras6f868402019-04-29 11:24:03 +10002018 * Since this is the device release function, we know that
2019 * userspace does not have any open fd referring to the
2020 * device. Therefore there can not be any of the device
2021 * attribute set/get functions being executed concurrently,
2022 * and similarly, the connect_vcpu and set/clr_mapped
2023 * functions also cannot be being executed.
2024 */
Paul Mackerrasc395fe12019-05-23 16:35:07 +10002025
2026 debugfs_remove(xive->dentry);
Paul Mackerras6f868402019-04-29 11:24:03 +10002027
2028 /*
2029 * We should clean up the vCPU interrupt presenters first.
2030 */
2031 kvm_for_each_vcpu(i, vcpu, kvm) {
2032 /*
2033 * Take vcpu->mutex to ensure that no one_reg get/set ioctl
2034 * (i.e. kvmppc_xive_[gs]et_icp) can be done concurrently.
Paul Mackerrasc395fe12019-05-23 16:35:07 +10002035 * Holding the vcpu->mutex also means that the vcpu cannot
2036 * be executing the KVM_RUN ioctl, and therefore it cannot
2037 * be executing the XIVE push or pull code or accessing
2038 * the XIVE MMIO regions.
Paul Mackerras6f868402019-04-29 11:24:03 +10002039 */
2040 mutex_lock(&vcpu->mutex);
2041 kvmppc_xive_cleanup_vcpu(vcpu);
2042 mutex_unlock(&vcpu->mutex);
2043 }
2044
Paul Mackerrasc395fe12019-05-23 16:35:07 +10002045 /*
2046 * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type
2047 * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe
2048 * against xive code getting called during vcpu execution or
2049 * set/get one_reg operations.
2050 */
Paul Mackerras6f868402019-04-29 11:24:03 +10002051 kvm->arch.xive = NULL;
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10002052
2053 /* Mask and free interrupts */
2054 for (i = 0; i <= xive->max_sbid; i++) {
2055 if (xive->src_blocks[i])
2056 kvmppc_xive_free_sources(xive->src_blocks[i]);
2057 kfree(xive->src_blocks[i]);
2058 xive->src_blocks[i] = NULL;
2059 }
2060
2061 if (xive->vp_base != XIVE_INVALID_VP)
2062 xive_native_free_vp_block(xive->vp_base);
2063
Cédric Le Goater5422e952019-04-18 12:39:42 +02002064 /*
2065 * A reference of the kvmppc_xive pointer is now kept under
2066 * the xive_devices struct of the machine for reuse. It is
2067 * freed when the VM is destroyed for now until we fix all the
2068 * execution paths.
2069 */
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10002070
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10002071 kfree(dev);
2072}
2073
Cédric Le Goater5422e952019-04-18 12:39:42 +02002074/*
2075 * When the guest chooses the interrupt mode (XICS legacy or XIVE
2076 * native), the VM will switch of KVM device. The previous device will
2077 * be "released" before the new one is created.
2078 *
2079 * Until we are sure all execution paths are well protected, provide a
2080 * fail safe (transitional) method for device destruction, in which
2081 * the XIVE device pointer is recycled and not directly freed.
2082 */
2083struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type)
2084{
2085 struct kvmppc_xive **kvm_xive_device = type == KVM_DEV_TYPE_XIVE ?
2086 &kvm->arch.xive_devices.native :
2087 &kvm->arch.xive_devices.xics_on_xive;
2088 struct kvmppc_xive *xive = *kvm_xive_device;
2089
2090 if (!xive) {
2091 xive = kzalloc(sizeof(*xive), GFP_KERNEL);
2092 *kvm_xive_device = xive;
2093 } else {
2094 memset(xive, 0, sizeof(*xive));
2095 }
2096
2097 return xive;
2098}
2099
Paul Mackerras6f868402019-04-29 11:24:03 +10002100/*
2101 * Create a XICS device with XIVE backend. kvm->lock is held.
2102 */
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10002103static int kvmppc_xive_create(struct kvm_device *dev, u32 type)
2104{
2105 struct kvmppc_xive *xive;
2106 struct kvm *kvm = dev->kvm;
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10002107
2108 pr_devel("Creating xive for partition\n");
2109
Greg Kurze7d71c92019-09-27 13:53:38 +02002110 /* Already there ? */
2111 if (kvm->arch.xive)
2112 return -EEXIST;
2113
Cédric Le Goater5422e952019-04-18 12:39:42 +02002114 xive = kvmppc_xive_get_device(kvm, type);
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10002115 if (!xive)
2116 return -ENOMEM;
2117
2118 dev->private = xive;
2119 xive->dev = dev;
2120 xive->kvm = kvm;
Cédric Le Goater7e10b9a2019-05-24 15:20:30 +02002121 mutex_init(&xive->lock);
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10002122
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10002123 /* We use the default queue size set by the host */
2124 xive->q_order = xive_native_default_eq_shift();
2125 if (xive->q_order < PAGE_SHIFT)
2126 xive->q_page_order = 0;
2127 else
2128 xive->q_page_order = xive->q_order - PAGE_SHIFT;
2129
Greg Kurz062cfab2019-09-27 13:54:01 +02002130 /* VP allocation is delayed to the first call to connect_vcpu */
2131 xive->vp_base = XIVE_INVALID_VP;
2132 /* KVM_MAX_VCPUS limits the number of VMs to roughly 64 per sockets
2133 * on a POWER9 system.
2134 */
2135 xive->nr_servers = KVM_MAX_VCPUS;
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10002136
Benjamin Herrenschmidtbf4159da2018-01-12 13:37:12 +11002137 xive->single_escalation = xive_native_has_single_escalation();
2138
Greg Kurze7d71c92019-09-27 13:53:38 +02002139 kvm->arch.xive = xive;
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10002140 return 0;
2141}
2142
Nicholas Piggin9dc2bab2021-05-28 19:07:33 +10002143int kvmppc_xive_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
2144{
2145 struct kvmppc_vcore *vc = vcpu->arch.vcore;
2146
2147 /* The VM should have configured XICS mode before doing XICS hcalls. */
2148 if (!kvmppc_xics_enabled(vcpu))
2149 return H_TOO_HARD;
2150
2151 switch (req) {
2152 case H_XIRR:
2153 return xive_vm_h_xirr(vcpu);
2154 case H_CPPR:
2155 return xive_vm_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
2156 case H_EOI:
2157 return xive_vm_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
2158 case H_IPI:
2159 return xive_vm_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
2160 kvmppc_get_gpr(vcpu, 5));
2161 case H_IPOLL:
2162 return xive_vm_h_ipoll(vcpu, kvmppc_get_gpr(vcpu, 4));
2163 case H_XIRR_X:
2164 xive_vm_h_xirr(vcpu);
2165 kvmppc_set_gpr(vcpu, 5, get_tb() + vc->tb_offset);
2166 return H_SUCCESS;
2167 }
2168
2169 return H_UNSUPPORTED;
2170}
2171EXPORT_SYMBOL_GPL(kvmppc_xive_xics_hcall);
2172
Cédric Le Goatereacc56b2019-04-18 12:39:28 +02002173int kvmppc_xive_debug_show_queues(struct seq_file *m, struct kvm_vcpu *vcpu)
2174{
2175 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
2176 unsigned int i;
2177
2178 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
2179 struct xive_q *q = &xc->queues[i];
2180 u32 i0, i1, idx;
2181
2182 if (!q->qpage && !xc->esc_virq[i])
2183 continue;
2184
Cédric Le Goatereacc56b2019-04-18 12:39:28 +02002185 if (q->qpage) {
Cédric Le Goater98983672020-12-10 18:14:38 +01002186 seq_printf(m, " q[%d]: ", i);
Cédric Le Goatereacc56b2019-04-18 12:39:28 +02002187 idx = q->idx;
2188 i0 = be32_to_cpup(q->qpage + idx);
2189 idx = (idx + 1) & q->msk;
2190 i1 = be32_to_cpup(q->qpage + idx);
2191 seq_printf(m, "T=%d %08x %08x...\n", q->toggle,
2192 i0, i1);
2193 }
2194 if (xc->esc_virq[i]) {
2195 struct irq_data *d = irq_get_irq_data(xc->esc_virq[i]);
2196 struct xive_irq_data *xd =
2197 irq_data_get_irq_handler_data(d);
2198 u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
2199
Cédric Le Goater98983672020-12-10 18:14:38 +01002200 seq_printf(m, " ESC %d %c%c EOI @%llx",
2201 xc->esc_virq[i],
2202 (pq & XIVE_ESB_VAL_P) ? 'P' : '-',
2203 (pq & XIVE_ESB_VAL_Q) ? 'Q' : '-',
2204 xd->eoi_page);
Cédric Le Goatereacc56b2019-04-18 12:39:28 +02002205 seq_puts(m, "\n");
2206 }
2207 }
2208 return 0;
2209}
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10002210
Cédric Le Goater98983672020-12-10 18:14:38 +01002211void kvmppc_xive_debug_show_sources(struct seq_file *m,
2212 struct kvmppc_xive_src_block *sb)
2213{
2214 int i;
2215
2216 seq_puts(m, " LISN HW/CHIP TYPE PQ EISN CPU/PRIO\n");
2217 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
2218 struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
2219 struct xive_irq_data *xd;
2220 u64 pq;
2221 u32 hw_num;
2222
2223 if (!state->valid)
2224 continue;
2225
2226 kvmppc_xive_select_irq(state, &hw_num, &xd);
2227
2228 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
2229
2230 seq_printf(m, "%08x %08x/%02x", state->number, hw_num,
2231 xd->src_chip);
2232 if (state->lsi)
2233 seq_printf(m, " %cLSI", state->asserted ? '^' : ' ');
2234 else
2235 seq_puts(m, " MSI");
2236
2237 seq_printf(m, " %s %c%c %08x % 4d/%d",
2238 state->ipi_number == hw_num ? "IPI" : " PT",
2239 pq & XIVE_ESB_VAL_P ? 'P' : '-',
2240 pq & XIVE_ESB_VAL_Q ? 'Q' : '-',
2241 state->eisn, state->act_server,
2242 state->act_priority);
2243
2244 seq_puts(m, "\n");
2245 }
2246}
2247
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10002248static int xive_debug_show(struct seq_file *m, void *private)
2249{
2250 struct kvmppc_xive *xive = m->private;
2251 struct kvm *kvm = xive->kvm;
2252 struct kvm_vcpu *vcpu;
2253 u64 t_rm_h_xirr = 0;
2254 u64 t_rm_h_ipoll = 0;
2255 u64 t_rm_h_cppr = 0;
2256 u64 t_rm_h_eoi = 0;
2257 u64 t_rm_h_ipi = 0;
2258 u64 t_vm_h_xirr = 0;
2259 u64 t_vm_h_ipoll = 0;
2260 u64 t_vm_h_cppr = 0;
2261 u64 t_vm_h_eoi = 0;
2262 u64 t_vm_h_ipi = 0;
2263 unsigned int i;
2264
2265 if (!kvm)
2266 return 0;
2267
Cédric Le Goater98983672020-12-10 18:14:38 +01002268 seq_puts(m, "=========\nVCPU state\n=========\n");
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10002269
2270 kvm_for_each_vcpu(i, vcpu, kvm) {
2271 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
2272
2273 if (!xc)
2274 continue;
2275
Cédric Le Goater98983672020-12-10 18:14:38 +01002276 seq_printf(m, "VCPU %d: VP:%#x/%02x\n"
2277 " CPPR:%#x HWCPPR:%#x MFRR:%#x PEND:%#x h_xirr: R=%lld V=%lld\n",
2278 xc->server_num, xc->vp_id, xc->vp_chip_id,
2279 xc->cppr, xc->hw_cppr,
2280 xc->mfrr, xc->pending,
2281 xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
Benjamin Herrenschmidtc424c102018-01-12 13:37:11 +11002282
Cédric Le Goatereacc56b2019-04-18 12:39:28 +02002283 kvmppc_xive_debug_show_queues(m, vcpu);
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10002284
2285 t_rm_h_xirr += xc->stat_rm_h_xirr;
2286 t_rm_h_ipoll += xc->stat_rm_h_ipoll;
2287 t_rm_h_cppr += xc->stat_rm_h_cppr;
2288 t_rm_h_eoi += xc->stat_rm_h_eoi;
2289 t_rm_h_ipi += xc->stat_rm_h_ipi;
2290 t_vm_h_xirr += xc->stat_vm_h_xirr;
2291 t_vm_h_ipoll += xc->stat_vm_h_ipoll;
2292 t_vm_h_cppr += xc->stat_vm_h_cppr;
2293 t_vm_h_eoi += xc->stat_vm_h_eoi;
2294 t_vm_h_ipi += xc->stat_vm_h_ipi;
2295 }
2296
Cédric Le Goater98983672020-12-10 18:14:38 +01002297 seq_puts(m, "Hcalls totals\n");
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10002298 seq_printf(m, " H_XIRR R=%10lld V=%10lld\n", t_rm_h_xirr, t_vm_h_xirr);
2299 seq_printf(m, " H_IPOLL R=%10lld V=%10lld\n", t_rm_h_ipoll, t_vm_h_ipoll);
2300 seq_printf(m, " H_CPPR R=%10lld V=%10lld\n", t_rm_h_cppr, t_vm_h_cppr);
2301 seq_printf(m, " H_EOI R=%10lld V=%10lld\n", t_rm_h_eoi, t_vm_h_eoi);
2302 seq_printf(m, " H_IPI R=%10lld V=%10lld\n", t_rm_h_ipi, t_vm_h_ipi);
2303
Cédric Le Goater98983672020-12-10 18:14:38 +01002304 seq_puts(m, "=========\nSources\n=========\n");
2305
2306 for (i = 0; i <= xive->max_sbid; i++) {
2307 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
2308
2309 if (sb) {
2310 arch_spin_lock(&sb->lock);
2311 kvmppc_xive_debug_show_sources(m, sb);
2312 arch_spin_unlock(&sb->lock);
2313 }
2314 }
2315
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10002316 return 0;
2317}
2318
Yangtao Li0f6ddf32018-11-05 09:47:17 -05002319DEFINE_SHOW_ATTRIBUTE(xive_debug);
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10002320
2321static void xive_debugfs_init(struct kvmppc_xive *xive)
2322{
2323 char *name;
2324
2325 name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive);
2326 if (!name) {
2327 pr_err("%s: no memory for name\n", __func__);
2328 return;
2329 }
2330
2331 xive->dentry = debugfs_create_file(name, S_IRUGO, powerpc_debugfs_root,
2332 xive, &xive_debug_fops);
2333
2334 pr_debug("%s: created %s\n", __func__, name);
2335 kfree(name);
2336}
2337
2338static void kvmppc_xive_init(struct kvm_device *dev)
2339{
2340 struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private;
2341
2342 /* Register some debug interfaces */
2343 xive_debugfs_init(xive);
2344}
2345
2346struct kvm_device_ops kvm_xive_ops = {
2347 .name = "kvm-xive",
2348 .create = kvmppc_xive_create,
2349 .init = kvmppc_xive_init,
Cédric Le Goater5422e952019-04-18 12:39:42 +02002350 .release = kvmppc_xive_release,
Benjamin Herrenschmidt5af50992017-04-05 17:54:56 +10002351 .set_attr = xive_set_attr,
2352 .get_attr = xive_get_attr,
2353 .has_attr = xive_has_attr,
2354};
2355
2356void kvmppc_xive_init_module(void)
2357{
2358 __xive_vm_h_xirr = xive_vm_h_xirr;
2359 __xive_vm_h_ipoll = xive_vm_h_ipoll;
2360 __xive_vm_h_ipi = xive_vm_h_ipi;
2361 __xive_vm_h_cppr = xive_vm_h_cppr;
2362 __xive_vm_h_eoi = xive_vm_h_eoi;
2363}
2364
2365void kvmppc_xive_exit_module(void)
2366{
2367 __xive_vm_h_xirr = NULL;
2368 __xive_vm_h_ipoll = NULL;
2369 __xive_vm_h_ipi = NULL;
2370 __xive_vm_h_cppr = NULL;
2371 __xive_vm_h_eoi = NULL;
2372}