blob: 58e7d33bddfca85be542b06d11f1f6a2192bbd6a [file] [log] [blame]
Thomas Gleixner7a338472019-06-04 10:11:15 +02001// SPDX-License-Identifier: GPL-2.0-only
Michael S. Tsirkin481eaec2016-01-21 14:44:10 +02002/*
3 * Copyright (C) 2016 Red Hat, Inc.
4 * Author: Michael S. Tsirkin <mst@redhat.com>
Michael S. Tsirkin481eaec2016-01-21 14:44:10 +02005 *
6 * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
7 * signalling, unconditionally.
8 */
9#define _GNU_SOURCE
10#include "main.h"
11#include <stdlib.h>
12#include <stdio.h>
13#include <string.h>
14
15/* Next - Where next entry will be written.
16 * Prev - "Next" value when event triggered previously.
17 * Event - Peer requested event after writing this entry.
18 */
19static inline bool need_event(unsigned short event,
20 unsigned short next,
21 unsigned short prev)
22{
23 return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
24}
25
26/* Design:
27 * Guest adds descriptors with unique index values and DESC_HW in flags.
28 * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
29 * Flags are always set last.
30 */
31#define DESC_HW 0x1
32
33struct desc {
34 unsigned short flags;
35 unsigned short index;
36 unsigned len;
37 unsigned long long addr;
38};
39
40/* how much padding is needed to avoid false cache sharing */
41#define HOST_GUEST_PADDING 0x80
42
43/* Mostly read */
44struct event {
45 unsigned short kick_index;
46 unsigned char reserved0[HOST_GUEST_PADDING - 2];
47 unsigned short call_index;
48 unsigned char reserved1[HOST_GUEST_PADDING - 2];
49};
50
51struct data {
52 void *buf; /* descriptor is writeable, we can't get buf from there */
53 void *data;
54} *data;
55
56struct desc *ring;
57struct event *event;
58
59struct guest {
60 unsigned avail_idx;
61 unsigned last_used_idx;
62 unsigned num_free;
63 unsigned kicked_avail_idx;
64 unsigned char reserved[HOST_GUEST_PADDING - 12];
65} guest;
66
67struct host {
68 /* we do not need to track last avail index
69 * unless we have more than one in flight.
70 */
71 unsigned used_idx;
72 unsigned called_used_idx;
73 unsigned char reserved[HOST_GUEST_PADDING - 4];
74} host;
75
76/* implemented by ring */
77void alloc_ring(void)
78{
79 int ret;
80 int i;
81
82 ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
83 if (ret) {
84 perror("Unable to allocate ring buffer.\n");
85 exit(3);
86 }
Peter Malone03ee47a2017-02-16 15:42:26 -050087 event = calloc(1, sizeof(*event));
Michael S. Tsirkin481eaec2016-01-21 14:44:10 +020088 if (!event) {
89 perror("Unable to allocate event buffer.\n");
90 exit(3);
91 }
Michael S. Tsirkin481eaec2016-01-21 14:44:10 +020092 guest.avail_idx = 0;
93 guest.kicked_avail_idx = -1;
94 guest.last_used_idx = 0;
95 host.used_idx = 0;
96 host.called_used_idx = -1;
97 for (i = 0; i < ring_size; ++i) {
98 struct desc desc = {
99 .index = i,
100 };
101 ring[i] = desc;
102 }
103 guest.num_free = ring_size;
Peter Malone03ee47a2017-02-16 15:42:26 -0500104 data = calloc(ring_size, sizeof(*data));
Michael S. Tsirkin481eaec2016-01-21 14:44:10 +0200105 if (!data) {
106 perror("Unable to allocate data buffer.\n");
107 exit(3);
108 }
Michael S. Tsirkin481eaec2016-01-21 14:44:10 +0200109}
110
111/* guest side */
112int add_inbuf(unsigned len, void *buf, void *datap)
113{
114 unsigned head, index;
115
116 if (!guest.num_free)
117 return -1;
118
119 guest.num_free--;
120 head = (ring_size - 1) & (guest.avail_idx++);
121
122 /* Start with a write. On MESI architectures this helps
123 * avoid a shared state with consumer that is polling this descriptor.
124 */
125 ring[head].addr = (unsigned long)(void*)buf;
126 ring[head].len = len;
127 /* read below might bypass write above. That is OK because it's just an
128 * optimization. If this happens, we will get the cache line in a
129 * shared state which is unfortunate, but probably not worth it to
130 * add an explicit full barrier to avoid this.
131 */
132 barrier();
133 index = ring[head].index;
134 data[index].buf = buf;
135 data[index].data = datap;
136 /* Barrier A (for pairing) */
137 smp_release();
138 ring[head].flags = DESC_HW;
139
140 return 0;
141}
142
143void *get_buf(unsigned *lenp, void **bufp)
144{
145 unsigned head = (ring_size - 1) & guest.last_used_idx;
146 unsigned index;
147 void *datap;
148
149 if (ring[head].flags & DESC_HW)
150 return NULL;
151 /* Barrier B (for pairing) */
152 smp_acquire();
153 *lenp = ring[head].len;
154 index = ring[head].index & (ring_size - 1);
155 datap = data[index].data;
156 *bufp = data[index].buf;
157 data[index].buf = NULL;
158 data[index].data = NULL;
159 guest.num_free++;
160 guest.last_used_idx++;
161 return datap;
162}
163
Paolo Bonzinid3c35892016-10-06 11:39:11 +0200164bool used_empty()
Michael S. Tsirkin481eaec2016-01-21 14:44:10 +0200165{
166 unsigned head = (ring_size - 1) & guest.last_used_idx;
167
Paolo Bonzinid3c35892016-10-06 11:39:11 +0200168 return (ring[head].flags & DESC_HW);
Michael S. Tsirkin481eaec2016-01-21 14:44:10 +0200169}
170
171void disable_call()
172{
173 /* Doing nothing to disable calls might cause
174 * extra interrupts, but reduces the number of cache misses.
175 */
176}
177
178bool enable_call()
179{
Michael S. Tsirkin481eaec2016-01-21 14:44:10 +0200180 event->call_index = guest.last_used_idx;
181 /* Flush call index write */
182 /* Barrier D (for pairing) */
183 smp_mb();
Paolo Bonzinid3c35892016-10-06 11:39:11 +0200184 return used_empty();
Michael S. Tsirkin481eaec2016-01-21 14:44:10 +0200185}
186
187void kick_available(void)
188{
Michael S. Tsirkinf229a552017-10-26 04:48:01 +0300189 bool need;
190
Michael S. Tsirkin481eaec2016-01-21 14:44:10 +0200191 /* Flush in previous flags write */
192 /* Barrier C (for pairing) */
193 smp_mb();
Michael S. Tsirkinf229a552017-10-26 04:48:01 +0300194 need = need_event(event->kick_index,
195 guest.avail_idx,
196 guest.kicked_avail_idx);
Michael S. Tsirkin481eaec2016-01-21 14:44:10 +0200197
198 guest.kicked_avail_idx = guest.avail_idx;
Michael S. Tsirkinf229a552017-10-26 04:48:01 +0300199 if (need)
200 kick();
Michael S. Tsirkin481eaec2016-01-21 14:44:10 +0200201}
202
203/* host side */
204void disable_kick()
205{
206 /* Doing nothing to disable kicks might cause
207 * extra interrupts, but reduces the number of cache misses.
208 */
209}
210
211bool enable_kick()
212{
Michael S. Tsirkin481eaec2016-01-21 14:44:10 +0200213 event->kick_index = host.used_idx;
214 /* Barrier C (for pairing) */
215 smp_mb();
Paolo Bonzinid3c35892016-10-06 11:39:11 +0200216 return avail_empty();
Michael S. Tsirkin481eaec2016-01-21 14:44:10 +0200217}
218
Paolo Bonzinid3c35892016-10-06 11:39:11 +0200219bool avail_empty()
Michael S. Tsirkin481eaec2016-01-21 14:44:10 +0200220{
221 unsigned head = (ring_size - 1) & host.used_idx;
222
Paolo Bonzinid3c35892016-10-06 11:39:11 +0200223 return !(ring[head].flags & DESC_HW);
Michael S. Tsirkin481eaec2016-01-21 14:44:10 +0200224}
225
226bool use_buf(unsigned *lenp, void **bufp)
227{
228 unsigned head = (ring_size - 1) & host.used_idx;
229
230 if (!(ring[head].flags & DESC_HW))
231 return false;
232
233 /* make sure length read below is not speculated */
234 /* Barrier A (for pairing) */
235 smp_acquire();
236
237 /* simple in-order completion: we don't need
238 * to touch index at all. This also means we
239 * can just modify the descriptor in-place.
240 */
241 ring[head].len--;
242 /* Make sure len is valid before flags.
243 * Note: alternative is to write len and flags in one access -
244 * possible on 64 bit architectures but wmb is free on Intel anyway
245 * so I have no way to test whether it's a gain.
246 */
247 /* Barrier B (for pairing) */
248 smp_release();
249 ring[head].flags = 0;
250 host.used_idx++;
251 return true;
252}
253
254void call_used(void)
255{
Michael S. Tsirkinf229a552017-10-26 04:48:01 +0300256 bool need;
257
Michael S. Tsirkin481eaec2016-01-21 14:44:10 +0200258 /* Flush in previous flags write */
259 /* Barrier D (for pairing) */
260 smp_mb();
Michael S. Tsirkinf229a552017-10-26 04:48:01 +0300261
262 need = need_event(event->call_index,
Michael S. Tsirkin481eaec2016-01-21 14:44:10 +0200263 host.used_idx,
Michael S. Tsirkinf229a552017-10-26 04:48:01 +0300264 host.called_used_idx);
Michael S. Tsirkin481eaec2016-01-21 14:44:10 +0200265
266 host.called_used_idx = host.used_idx;
Michael S. Tsirkinf229a552017-10-26 04:48:01 +0300267
268 if (need)
269 call();
Michael S. Tsirkin481eaec2016-01-21 14:44:10 +0200270}