blob: 47c6d4d43c70788c7e9b38cab92434442c58f9fc [file] [log] [blame]
Asias He433fc582016-07-28 15:36:34 +01001/*
2 * vhost transport for vsock
3 *
4 * Copyright (C) 2013-2015 Red Hat, Inc.
5 * Author: Asias He <asias@redhat.com>
6 * Stefan Hajnoczi <stefanha@redhat.com>
7 *
8 * This work is licensed under the terms of the GNU GPL, version 2.
9 */
10#include <linux/miscdevice.h>
11#include <linux/atomic.h>
12#include <linux/module.h>
13#include <linux/mutex.h>
14#include <linux/vmalloc.h>
15#include <net/sock.h>
16#include <linux/virtio_vsock.h>
17#include <linux/vhost.h>
Stefan Hajnoczi834e7722018-11-05 10:35:47 +000018#include <linux/hashtable.h>
Asias He433fc582016-07-28 15:36:34 +010019
20#include <net/af_vsock.h>
21#include "vhost.h"
22
23#define VHOST_VSOCK_DEFAULT_HOST_CID 2
Jason Wange82b9b02019-05-17 00:29:49 -040024/* Max number of bytes transferred before requeueing the job.
25 * Using this limit prevents one virtqueue from starving others. */
26#define VHOST_VSOCK_WEIGHT 0x80000
27/* Max number of packets transferred before requeueing the job.
28 * Using this limit prevents one virtqueue from starving others with
29 * small pkts.
30 */
31#define VHOST_VSOCK_PKT_WEIGHT 256
Asias He433fc582016-07-28 15:36:34 +010032
33enum {
34 VHOST_VSOCK_FEATURES = VHOST_FEATURES,
35};
36
37/* Used to track all the vhost_vsock instances on the system. */
Stefan Hajnoczi6db3d8d2018-11-05 17:33:22 +000038static DEFINE_MUTEX(vhost_vsock_mutex);
Stefan Hajnoczi834e7722018-11-05 10:35:47 +000039static DEFINE_READ_MOSTLY_HASHTABLE(vhost_vsock_hash, 8);
Asias He433fc582016-07-28 15:36:34 +010040
41struct vhost_vsock {
42 struct vhost_dev dev;
43 struct vhost_virtqueue vqs[2];
44
Stefan Hajnoczi6db3d8d2018-11-05 17:33:22 +000045 /* Link to global vhost_vsock_hash, writes use vhost_vsock_mutex */
Stefan Hajnoczi834e7722018-11-05 10:35:47 +000046 struct hlist_node hash;
Asias He433fc582016-07-28 15:36:34 +010047
48 struct vhost_work send_pkt_work;
49 spinlock_t send_pkt_list_lock;
50 struct list_head send_pkt_list; /* host->guest pending packets */
51
52 atomic_t queued_replies;
53
54 u32 guest_cid;
55};
56
57static u32 vhost_transport_get_local_cid(void)
58{
59 return VHOST_VSOCK_DEFAULT_HOST_CID;
60}
61
Stefan Hajnoczi6db3d8d2018-11-05 17:33:22 +000062/* Callers that dereference the return value must hold vhost_vsock_mutex or the
Stefan Hajnoczi834e7722018-11-05 10:35:47 +000063 * RCU read lock.
64 */
65static struct vhost_vsock *vhost_vsock_get(u32 guest_cid)
Asias He433fc582016-07-28 15:36:34 +010066{
67 struct vhost_vsock *vsock;
68
Stefan Hajnoczi834e7722018-11-05 10:35:47 +000069 hash_for_each_possible_rcu(vhost_vsock_hash, vsock, hash, guest_cid) {
Asias He433fc582016-07-28 15:36:34 +010070 u32 other_cid = vsock->guest_cid;
71
72 /* Skip instances that have no CID yet */
73 if (other_cid == 0)
74 continue;
75
Vaibhav Murkuteff3c1b12018-03-09 08:26:03 +053076 if (other_cid == guest_cid)
Asias He433fc582016-07-28 15:36:34 +010077 return vsock;
Vaibhav Murkuteff3c1b12018-03-09 08:26:03 +053078
Asias He433fc582016-07-28 15:36:34 +010079 }
Asias He433fc582016-07-28 15:36:34 +010080
81 return NULL;
82}
83
84static void
85vhost_transport_do_send_pkt(struct vhost_vsock *vsock,
86 struct vhost_virtqueue *vq)
87{
88 struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
89 bool added = false;
90 bool restart_tx = false;
91
92 mutex_lock(&vq->mutex);
93
94 if (!vq->private_data)
95 goto out;
96
97 /* Avoid further vmexits, we're already processing the virtqueue */
98 vhost_disable_notify(&vsock->dev, vq);
99
100 for (;;) {
101 struct virtio_vsock_pkt *pkt;
102 struct iov_iter iov_iter;
103 unsigned out, in;
104 size_t nbytes;
105 size_t len;
106 int head;
107
108 spin_lock_bh(&vsock->send_pkt_list_lock);
109 if (list_empty(&vsock->send_pkt_list)) {
110 spin_unlock_bh(&vsock->send_pkt_list_lock);
111 vhost_enable_notify(&vsock->dev, vq);
112 break;
113 }
114
115 pkt = list_first_entry(&vsock->send_pkt_list,
116 struct virtio_vsock_pkt, list);
117 list_del_init(&pkt->list);
118 spin_unlock_bh(&vsock->send_pkt_list_lock);
119
120 head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
121 &out, &in, NULL, NULL);
122 if (head < 0) {
123 spin_lock_bh(&vsock->send_pkt_list_lock);
124 list_add(&pkt->list, &vsock->send_pkt_list);
125 spin_unlock_bh(&vsock->send_pkt_list_lock);
126 break;
127 }
128
129 if (head == vq->num) {
130 spin_lock_bh(&vsock->send_pkt_list_lock);
131 list_add(&pkt->list, &vsock->send_pkt_list);
132 spin_unlock_bh(&vsock->send_pkt_list_lock);
133
134 /* We cannot finish yet if more buffers snuck in while
135 * re-enabling notify.
136 */
137 if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
138 vhost_disable_notify(&vsock->dev, vq);
139 continue;
140 }
141 break;
142 }
143
144 if (out) {
145 virtio_transport_free_pkt(pkt);
146 vq_err(vq, "Expected 0 output buffers, got %u\n", out);
147 break;
148 }
149
150 len = iov_length(&vq->iov[out], in);
151 iov_iter_init(&iov_iter, READ, &vq->iov[out], in, len);
152
153 nbytes = copy_to_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
154 if (nbytes != sizeof(pkt->hdr)) {
155 virtio_transport_free_pkt(pkt);
156 vq_err(vq, "Faulted on copying pkt hdr\n");
157 break;
158 }
159
160 nbytes = copy_to_iter(pkt->buf, pkt->len, &iov_iter);
161 if (nbytes != pkt->len) {
162 virtio_transport_free_pkt(pkt);
163 vq_err(vq, "Faulted on copying pkt buf\n");
164 break;
165 }
166
167 vhost_add_used(vq, head, sizeof(pkt->hdr) + pkt->len);
168 added = true;
169
170 if (pkt->reply) {
171 int val;
172
173 val = atomic_dec_return(&vsock->queued_replies);
174
175 /* Do we have resources to resume tx processing? */
176 if (val + 1 == tx_vq->num)
177 restart_tx = true;
178 }
179
Gerard Garcia82dfb5402017-04-21 10:10:46 +0100180 /* Deliver to monitoring devices all correctly transmitted
181 * packets.
182 */
183 virtio_transport_deliver_tap_pkt(pkt);
184
Asias He433fc582016-07-28 15:36:34 +0100185 virtio_transport_free_pkt(pkt);
186 }
187 if (added)
188 vhost_signal(&vsock->dev, vq);
189
190out:
191 mutex_unlock(&vq->mutex);
192
193 if (restart_tx)
194 vhost_poll_queue(&tx_vq->poll);
195}
196
197static void vhost_transport_send_pkt_work(struct vhost_work *work)
198{
199 struct vhost_virtqueue *vq;
200 struct vhost_vsock *vsock;
201
202 vsock = container_of(work, struct vhost_vsock, send_pkt_work);
203 vq = &vsock->vqs[VSOCK_VQ_RX];
204
205 vhost_transport_do_send_pkt(vsock, vq);
206}
207
208static int
209vhost_transport_send_pkt(struct virtio_vsock_pkt *pkt)
210{
211 struct vhost_vsock *vsock;
Asias He433fc582016-07-28 15:36:34 +0100212 int len = pkt->len;
213
Stefan Hajnoczi834e7722018-11-05 10:35:47 +0000214 rcu_read_lock();
215
Asias He433fc582016-07-28 15:36:34 +0100216 /* Find the vhost_vsock according to guest context id */
217 vsock = vhost_vsock_get(le64_to_cpu(pkt->hdr.dst_cid));
218 if (!vsock) {
Stefan Hajnoczi834e7722018-11-05 10:35:47 +0000219 rcu_read_unlock();
Asias He433fc582016-07-28 15:36:34 +0100220 virtio_transport_free_pkt(pkt);
221 return -ENODEV;
222 }
223
Asias He433fc582016-07-28 15:36:34 +0100224 if (pkt->reply)
225 atomic_inc(&vsock->queued_replies);
226
227 spin_lock_bh(&vsock->send_pkt_list_lock);
228 list_add_tail(&pkt->list, &vsock->send_pkt_list);
229 spin_unlock_bh(&vsock->send_pkt_list_lock);
230
231 vhost_work_queue(&vsock->dev, &vsock->send_pkt_work);
Stefan Hajnoczi834e7722018-11-05 10:35:47 +0000232
233 rcu_read_unlock();
Asias He433fc582016-07-28 15:36:34 +0100234 return len;
235}
236
Peng Tao16320f32017-03-15 09:32:15 +0800237static int
238vhost_transport_cancel_pkt(struct vsock_sock *vsk)
239{
240 struct vhost_vsock *vsock;
241 struct virtio_vsock_pkt *pkt, *n;
242 int cnt = 0;
Stefan Hajnoczi834e7722018-11-05 10:35:47 +0000243 int ret = -ENODEV;
Peng Tao16320f32017-03-15 09:32:15 +0800244 LIST_HEAD(freeme);
245
Stefan Hajnoczi834e7722018-11-05 10:35:47 +0000246 rcu_read_lock();
247
Peng Tao16320f32017-03-15 09:32:15 +0800248 /* Find the vhost_vsock according to guest context id */
249 vsock = vhost_vsock_get(vsk->remote_addr.svm_cid);
250 if (!vsock)
Stefan Hajnoczi834e7722018-11-05 10:35:47 +0000251 goto out;
Peng Tao16320f32017-03-15 09:32:15 +0800252
253 spin_lock_bh(&vsock->send_pkt_list_lock);
254 list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) {
255 if (pkt->vsk != vsk)
256 continue;
257 list_move(&pkt->list, &freeme);
258 }
259 spin_unlock_bh(&vsock->send_pkt_list_lock);
260
261 list_for_each_entry_safe(pkt, n, &freeme, list) {
262 if (pkt->reply)
263 cnt++;
264 list_del(&pkt->list);
265 virtio_transport_free_pkt(pkt);
266 }
267
268 if (cnt) {
269 struct vhost_virtqueue *tx_vq = &vsock->vqs[VSOCK_VQ_TX];
270 int new_cnt;
271
272 new_cnt = atomic_sub_return(cnt, &vsock->queued_replies);
273 if (new_cnt + cnt >= tx_vq->num && new_cnt < tx_vq->num)
274 vhost_poll_queue(&tx_vq->poll);
275 }
276
Stefan Hajnoczi834e7722018-11-05 10:35:47 +0000277 ret = 0;
278out:
279 rcu_read_unlock();
280 return ret;
Peng Tao16320f32017-03-15 09:32:15 +0800281}
282
Asias He433fc582016-07-28 15:36:34 +0100283static struct virtio_vsock_pkt *
284vhost_vsock_alloc_pkt(struct vhost_virtqueue *vq,
285 unsigned int out, unsigned int in)
286{
287 struct virtio_vsock_pkt *pkt;
288 struct iov_iter iov_iter;
289 size_t nbytes;
290 size_t len;
291
292 if (in != 0) {
293 vq_err(vq, "Expected 0 input buffers, got %u\n", in);
294 return NULL;
295 }
296
297 pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
298 if (!pkt)
299 return NULL;
300
301 len = iov_length(vq->iov, out);
302 iov_iter_init(&iov_iter, WRITE, vq->iov, out, len);
303
304 nbytes = copy_from_iter(&pkt->hdr, sizeof(pkt->hdr), &iov_iter);
305 if (nbytes != sizeof(pkt->hdr)) {
306 vq_err(vq, "Expected %zu bytes for pkt->hdr, got %zu bytes\n",
307 sizeof(pkt->hdr), nbytes);
308 kfree(pkt);
309 return NULL;
310 }
311
312 if (le16_to_cpu(pkt->hdr.type) == VIRTIO_VSOCK_TYPE_STREAM)
313 pkt->len = le32_to_cpu(pkt->hdr.len);
314
315 /* No payload */
316 if (!pkt->len)
317 return pkt;
318
319 /* The pkt is too big */
320 if (pkt->len > VIRTIO_VSOCK_MAX_PKT_BUF_SIZE) {
321 kfree(pkt);
322 return NULL;
323 }
324
325 pkt->buf = kmalloc(pkt->len, GFP_KERNEL);
326 if (!pkt->buf) {
327 kfree(pkt);
328 return NULL;
329 }
330
331 nbytes = copy_from_iter(pkt->buf, pkt->len, &iov_iter);
332 if (nbytes != pkt->len) {
333 vq_err(vq, "Expected %u byte payload, got %zu bytes\n",
334 pkt->len, nbytes);
335 virtio_transport_free_pkt(pkt);
336 return NULL;
337 }
338
339 return pkt;
340}
341
342/* Is there space left for replies to rx packets? */
343static bool vhost_vsock_more_replies(struct vhost_vsock *vsock)
344{
345 struct vhost_virtqueue *vq = &vsock->vqs[VSOCK_VQ_TX];
346 int val;
347
348 smp_rmb(); /* paired with atomic_inc() and atomic_dec_return() */
349 val = atomic_read(&vsock->queued_replies);
350
351 return val < vq->num;
352}
353
354static void vhost_vsock_handle_tx_kick(struct vhost_work *work)
355{
356 struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
357 poll.work);
358 struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
359 dev);
360 struct virtio_vsock_pkt *pkt;
361 int head;
362 unsigned int out, in;
363 bool added = false;
364
365 mutex_lock(&vq->mutex);
366
367 if (!vq->private_data)
368 goto out;
369
370 vhost_disable_notify(&vsock->dev, vq);
371 for (;;) {
Stefan Hajnoczi3fda5d62016-08-04 14:52:53 +0100372 u32 len;
373
Asias He433fc582016-07-28 15:36:34 +0100374 if (!vhost_vsock_more_replies(vsock)) {
375 /* Stop tx until the device processes already
376 * pending replies. Leave tx virtqueue
377 * callbacks disabled.
378 */
379 goto no_more_replies;
380 }
381
382 head = vhost_get_vq_desc(vq, vq->iov, ARRAY_SIZE(vq->iov),
383 &out, &in, NULL, NULL);
384 if (head < 0)
385 break;
386
387 if (head == vq->num) {
388 if (unlikely(vhost_enable_notify(&vsock->dev, vq))) {
389 vhost_disable_notify(&vsock->dev, vq);
390 continue;
391 }
392 break;
393 }
394
395 pkt = vhost_vsock_alloc_pkt(vq, out, in);
396 if (!pkt) {
397 vq_err(vq, "Faulted on pkt\n");
398 continue;
399 }
400
Stefan Hajnoczi3fda5d62016-08-04 14:52:53 +0100401 len = pkt->len;
402
Gerard Garcia82dfb5402017-04-21 10:10:46 +0100403 /* Deliver to monitoring devices all received packets */
404 virtio_transport_deliver_tap_pkt(pkt);
405
Asias He433fc582016-07-28 15:36:34 +0100406 /* Only accept correctly addressed packets */
407 if (le64_to_cpu(pkt->hdr.src_cid) == vsock->guest_cid)
408 virtio_transport_recv_pkt(pkt);
409 else
410 virtio_transport_free_pkt(pkt);
411
Stefan Hajnoczi3fda5d62016-08-04 14:52:53 +0100412 vhost_add_used(vq, head, sizeof(pkt->hdr) + len);
Asias He433fc582016-07-28 15:36:34 +0100413 added = true;
414 }
415
416no_more_replies:
417 if (added)
418 vhost_signal(&vsock->dev, vq);
419
420out:
421 mutex_unlock(&vq->mutex);
422}
423
424static void vhost_vsock_handle_rx_kick(struct vhost_work *work)
425{
426 struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
427 poll.work);
428 struct vhost_vsock *vsock = container_of(vq->dev, struct vhost_vsock,
429 dev);
430
431 vhost_transport_do_send_pkt(vsock, vq);
432}
433
434static int vhost_vsock_start(struct vhost_vsock *vsock)
435{
Stefan Hajnoczi0516ffd2017-01-19 10:43:53 +0000436 struct vhost_virtqueue *vq;
Asias He433fc582016-07-28 15:36:34 +0100437 size_t i;
438 int ret;
439
440 mutex_lock(&vsock->dev.mutex);
441
442 ret = vhost_dev_check_owner(&vsock->dev);
443 if (ret)
444 goto err;
445
446 for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
Stefan Hajnoczi0516ffd2017-01-19 10:43:53 +0000447 vq = &vsock->vqs[i];
Asias He433fc582016-07-28 15:36:34 +0100448
449 mutex_lock(&vq->mutex);
450
451 if (!vhost_vq_access_ok(vq)) {
452 ret = -EFAULT;
Asias He433fc582016-07-28 15:36:34 +0100453 goto err_vq;
454 }
455
456 if (!vq->private_data) {
457 vq->private_data = vsock;
Stefan Hajnoczi0516ffd2017-01-19 10:43:53 +0000458 ret = vhost_vq_init_access(vq);
459 if (ret)
460 goto err_vq;
Asias He433fc582016-07-28 15:36:34 +0100461 }
462
463 mutex_unlock(&vq->mutex);
464 }
465
466 mutex_unlock(&vsock->dev.mutex);
467 return 0;
468
469err_vq:
Stefan Hajnoczi0516ffd2017-01-19 10:43:53 +0000470 vq->private_data = NULL;
471 mutex_unlock(&vq->mutex);
472
Asias He433fc582016-07-28 15:36:34 +0100473 for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
Stefan Hajnoczi0516ffd2017-01-19 10:43:53 +0000474 vq = &vsock->vqs[i];
Asias He433fc582016-07-28 15:36:34 +0100475
476 mutex_lock(&vq->mutex);
477 vq->private_data = NULL;
478 mutex_unlock(&vq->mutex);
479 }
480err:
481 mutex_unlock(&vsock->dev.mutex);
482 return ret;
483}
484
485static int vhost_vsock_stop(struct vhost_vsock *vsock)
486{
487 size_t i;
488 int ret;
489
490 mutex_lock(&vsock->dev.mutex);
491
492 ret = vhost_dev_check_owner(&vsock->dev);
493 if (ret)
494 goto err;
495
496 for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
497 struct vhost_virtqueue *vq = &vsock->vqs[i];
498
499 mutex_lock(&vq->mutex);
500 vq->private_data = NULL;
501 mutex_unlock(&vq->mutex);
502 }
503
504err:
505 mutex_unlock(&vsock->dev.mutex);
506 return ret;
507}
508
509static void vhost_vsock_free(struct vhost_vsock *vsock)
510{
Wei Yongjunb226aca2016-08-02 13:50:42 +0000511 kvfree(vsock);
Asias He433fc582016-07-28 15:36:34 +0100512}
513
514static int vhost_vsock_dev_open(struct inode *inode, struct file *file)
515{
516 struct vhost_virtqueue **vqs;
517 struct vhost_vsock *vsock;
518 int ret;
519
520 /* This struct is large and allocation could fail, fall back to vmalloc
521 * if there is no other way.
522 */
Michal Hockodcda9b02017-07-12 14:36:45 -0700523 vsock = kvmalloc(sizeof(*vsock), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
Michal Hocko6c5ab652017-05-08 15:57:15 -0700524 if (!vsock)
525 return -ENOMEM;
Asias He433fc582016-07-28 15:36:34 +0100526
527 vqs = kmalloc_array(ARRAY_SIZE(vsock->vqs), sizeof(*vqs), GFP_KERNEL);
528 if (!vqs) {
529 ret = -ENOMEM;
530 goto out;
531 }
532
Stefan Hajnoczia72b69d2017-11-09 13:29:10 +0000533 vsock->guest_cid = 0; /* no CID assigned yet */
534
Asias He433fc582016-07-28 15:36:34 +0100535 atomic_set(&vsock->queued_replies, 0);
536
537 vqs[VSOCK_VQ_TX] = &vsock->vqs[VSOCK_VQ_TX];
538 vqs[VSOCK_VQ_RX] = &vsock->vqs[VSOCK_VQ_RX];
539 vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick;
540 vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick;
541
Jason Wange82b9b02019-05-17 00:29:49 -0400542 vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs),
543 UIO_MAXIOV, VHOST_VSOCK_PKT_WEIGHT,
544 VHOST_VSOCK_WEIGHT);
Asias He433fc582016-07-28 15:36:34 +0100545
546 file->private_data = vsock;
547 spin_lock_init(&vsock->send_pkt_list_lock);
548 INIT_LIST_HEAD(&vsock->send_pkt_list);
549 vhost_work_init(&vsock->send_pkt_work, vhost_transport_send_pkt_work);
Asias He433fc582016-07-28 15:36:34 +0100550 return 0;
551
552out:
553 vhost_vsock_free(vsock);
554 return ret;
555}
556
557static void vhost_vsock_flush(struct vhost_vsock *vsock)
558{
559 int i;
560
561 for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++)
562 if (vsock->vqs[i].handle_kick)
563 vhost_poll_flush(&vsock->vqs[i].poll);
564 vhost_work_flush(&vsock->dev, &vsock->send_pkt_work);
565}
566
567static void vhost_vsock_reset_orphans(struct sock *sk)
568{
569 struct vsock_sock *vsk = vsock_sk(sk);
570
571 /* vmci_transport.c doesn't take sk_lock here either. At least we're
572 * under vsock_table_lock so the sock cannot disappear while we're
573 * executing.
574 */
575
Stefan Hajnoczic38f57d2018-12-06 19:14:34 +0000576 /* If the peer is still valid, no need to reset connection */
577 if (vhost_vsock_get(vsk->remote_addr.svm_cid))
578 return;
579
580 /* If the close timeout is pending, let it expire. This avoids races
581 * with the timeout callback.
582 */
583 if (vsk->close_work_scheduled)
584 return;
585
586 sock_set_flag(sk, SOCK_DONE);
587 vsk->peer_shutdown = SHUTDOWN_MASK;
588 sk->sk_state = SS_UNCONNECTED;
589 sk->sk_err = ECONNRESET;
590 sk->sk_error_report(sk);
Asias He433fc582016-07-28 15:36:34 +0100591}
592
593static int vhost_vsock_dev_release(struct inode *inode, struct file *file)
594{
595 struct vhost_vsock *vsock = file->private_data;
596
Stefan Hajnoczi6db3d8d2018-11-05 17:33:22 +0000597 mutex_lock(&vhost_vsock_mutex);
Stefan Hajnoczi834e7722018-11-05 10:35:47 +0000598 if (vsock->guest_cid)
599 hash_del_rcu(&vsock->hash);
Stefan Hajnoczi6db3d8d2018-11-05 17:33:22 +0000600 mutex_unlock(&vhost_vsock_mutex);
Asias He433fc582016-07-28 15:36:34 +0100601
Stefan Hajnoczi834e7722018-11-05 10:35:47 +0000602 /* Wait for other CPUs to finish using vsock */
603 synchronize_rcu();
604
Asias He433fc582016-07-28 15:36:34 +0100605 /* Iterating over all connections for all CIDs to find orphans is
606 * inefficient. Room for improvement here. */
607 vsock_for_each_connected_socket(vhost_vsock_reset_orphans);
608
609 vhost_vsock_stop(vsock);
610 vhost_vsock_flush(vsock);
611 vhost_dev_stop(&vsock->dev);
612
613 spin_lock_bh(&vsock->send_pkt_list_lock);
614 while (!list_empty(&vsock->send_pkt_list)) {
615 struct virtio_vsock_pkt *pkt;
616
617 pkt = list_first_entry(&vsock->send_pkt_list,
618 struct virtio_vsock_pkt, list);
619 list_del_init(&pkt->list);
620 virtio_transport_free_pkt(pkt);
621 }
622 spin_unlock_bh(&vsock->send_pkt_list_lock);
623
夷则(Caspar)f6f93f72017-12-25 00:08:58 +0800624 vhost_dev_cleanup(&vsock->dev);
Asias He433fc582016-07-28 15:36:34 +0100625 kfree(vsock->dev.vqs);
626 vhost_vsock_free(vsock);
627 return 0;
628}
629
630static int vhost_vsock_set_cid(struct vhost_vsock *vsock, u64 guest_cid)
631{
632 struct vhost_vsock *other;
633
634 /* Refuse reserved CIDs */
635 if (guest_cid <= VMADDR_CID_HOST ||
636 guest_cid == U32_MAX)
637 return -EINVAL;
638
639 /* 64-bit CIDs are not yet supported */
640 if (guest_cid > U32_MAX)
641 return -EINVAL;
642
643 /* Refuse if CID is already in use */
Stefan Hajnoczi6db3d8d2018-11-05 17:33:22 +0000644 mutex_lock(&vhost_vsock_mutex);
Stefan Hajnoczi834e7722018-11-05 10:35:47 +0000645 other = vhost_vsock_get(guest_cid);
Gao feng6c083c22016-12-14 19:24:36 +0800646 if (other && other != vsock) {
Stefan Hajnoczi6db3d8d2018-11-05 17:33:22 +0000647 mutex_unlock(&vhost_vsock_mutex);
Gao feng6c083c22016-12-14 19:24:36 +0800648 return -EADDRINUSE;
649 }
Stefan Hajnoczi834e7722018-11-05 10:35:47 +0000650
651 if (vsock->guest_cid)
652 hash_del_rcu(&vsock->hash);
653
Asias He433fc582016-07-28 15:36:34 +0100654 vsock->guest_cid = guest_cid;
Zha Bin7fbe0782019-01-08 16:07:03 +0800655 hash_add_rcu(vhost_vsock_hash, &vsock->hash, vsock->guest_cid);
Stefan Hajnoczi6db3d8d2018-11-05 17:33:22 +0000656 mutex_unlock(&vhost_vsock_mutex);
Asias He433fc582016-07-28 15:36:34 +0100657
658 return 0;
659}
660
661static int vhost_vsock_set_features(struct vhost_vsock *vsock, u64 features)
662{
663 struct vhost_virtqueue *vq;
664 int i;
665
666 if (features & ~VHOST_VSOCK_FEATURES)
667 return -EOPNOTSUPP;
668
669 mutex_lock(&vsock->dev.mutex);
670 if ((features & (1 << VHOST_F_LOG_ALL)) &&
671 !vhost_log_access_ok(&vsock->dev)) {
672 mutex_unlock(&vsock->dev.mutex);
673 return -EFAULT;
674 }
675
676 for (i = 0; i < ARRAY_SIZE(vsock->vqs); i++) {
677 vq = &vsock->vqs[i];
678 mutex_lock(&vq->mutex);
679 vq->acked_features = features;
680 mutex_unlock(&vq->mutex);
681 }
682 mutex_unlock(&vsock->dev.mutex);
683 return 0;
684}
685
686static long vhost_vsock_dev_ioctl(struct file *f, unsigned int ioctl,
687 unsigned long arg)
688{
689 struct vhost_vsock *vsock = f->private_data;
690 void __user *argp = (void __user *)arg;
691 u64 guest_cid;
692 u64 features;
693 int start;
694 int r;
695
696 switch (ioctl) {
697 case VHOST_VSOCK_SET_GUEST_CID:
698 if (copy_from_user(&guest_cid, argp, sizeof(guest_cid)))
699 return -EFAULT;
700 return vhost_vsock_set_cid(vsock, guest_cid);
701 case VHOST_VSOCK_SET_RUNNING:
702 if (copy_from_user(&start, argp, sizeof(start)))
703 return -EFAULT;
704 if (start)
705 return vhost_vsock_start(vsock);
706 else
707 return vhost_vsock_stop(vsock);
708 case VHOST_GET_FEATURES:
709 features = VHOST_VSOCK_FEATURES;
710 if (copy_to_user(argp, &features, sizeof(features)))
711 return -EFAULT;
712 return 0;
713 case VHOST_SET_FEATURES:
714 if (copy_from_user(&features, argp, sizeof(features)))
715 return -EFAULT;
716 return vhost_vsock_set_features(vsock, features);
717 default:
718 mutex_lock(&vsock->dev.mutex);
719 r = vhost_dev_ioctl(&vsock->dev, ioctl, argp);
720 if (r == -ENOIOCTLCMD)
721 r = vhost_vring_ioctl(&vsock->dev, ioctl, argp);
722 else
723 vhost_vsock_flush(vsock);
724 mutex_unlock(&vsock->dev.mutex);
725 return r;
726 }
727}
728
Sonny Raodc32bb62018-03-14 14:36:25 -0700729#ifdef CONFIG_COMPAT
730static long vhost_vsock_dev_compat_ioctl(struct file *f, unsigned int ioctl,
731 unsigned long arg)
732{
733 return vhost_vsock_dev_ioctl(f, ioctl, (unsigned long)compat_ptr(arg));
734}
735#endif
736
Asias He433fc582016-07-28 15:36:34 +0100737static const struct file_operations vhost_vsock_fops = {
738 .owner = THIS_MODULE,
739 .open = vhost_vsock_dev_open,
740 .release = vhost_vsock_dev_release,
741 .llseek = noop_llseek,
742 .unlocked_ioctl = vhost_vsock_dev_ioctl,
Sonny Raodc32bb62018-03-14 14:36:25 -0700743#ifdef CONFIG_COMPAT
744 .compat_ioctl = vhost_vsock_dev_compat_ioctl,
745#endif
Asias He433fc582016-07-28 15:36:34 +0100746};
747
748static struct miscdevice vhost_vsock_misc = {
Stefan Hajnoczif4660cc2017-05-10 10:19:18 -0400749 .minor = VHOST_VSOCK_MINOR,
Asias He433fc582016-07-28 15:36:34 +0100750 .name = "vhost-vsock",
751 .fops = &vhost_vsock_fops,
752};
753
754static struct virtio_transport vhost_transport = {
755 .transport = {
756 .get_local_cid = vhost_transport_get_local_cid,
757
758 .init = virtio_transport_do_socket_init,
759 .destruct = virtio_transport_destruct,
760 .release = virtio_transport_release,
761 .connect = virtio_transport_connect,
762 .shutdown = virtio_transport_shutdown,
Peng Tao16320f32017-03-15 09:32:15 +0800763 .cancel_pkt = vhost_transport_cancel_pkt,
Asias He433fc582016-07-28 15:36:34 +0100764
765 .dgram_enqueue = virtio_transport_dgram_enqueue,
766 .dgram_dequeue = virtio_transport_dgram_dequeue,
767 .dgram_bind = virtio_transport_dgram_bind,
768 .dgram_allow = virtio_transport_dgram_allow,
769
770 .stream_enqueue = virtio_transport_stream_enqueue,
771 .stream_dequeue = virtio_transport_stream_dequeue,
772 .stream_has_data = virtio_transport_stream_has_data,
773 .stream_has_space = virtio_transport_stream_has_space,
774 .stream_rcvhiwat = virtio_transport_stream_rcvhiwat,
775 .stream_is_active = virtio_transport_stream_is_active,
776 .stream_allow = virtio_transport_stream_allow,
777
778 .notify_poll_in = virtio_transport_notify_poll_in,
779 .notify_poll_out = virtio_transport_notify_poll_out,
780 .notify_recv_init = virtio_transport_notify_recv_init,
781 .notify_recv_pre_block = virtio_transport_notify_recv_pre_block,
782 .notify_recv_pre_dequeue = virtio_transport_notify_recv_pre_dequeue,
783 .notify_recv_post_dequeue = virtio_transport_notify_recv_post_dequeue,
784 .notify_send_init = virtio_transport_notify_send_init,
785 .notify_send_pre_block = virtio_transport_notify_send_pre_block,
786 .notify_send_pre_enqueue = virtio_transport_notify_send_pre_enqueue,
787 .notify_send_post_enqueue = virtio_transport_notify_send_post_enqueue,
788
789 .set_buffer_size = virtio_transport_set_buffer_size,
790 .set_min_buffer_size = virtio_transport_set_min_buffer_size,
791 .set_max_buffer_size = virtio_transport_set_max_buffer_size,
792 .get_buffer_size = virtio_transport_get_buffer_size,
793 .get_min_buffer_size = virtio_transport_get_min_buffer_size,
794 .get_max_buffer_size = virtio_transport_get_max_buffer_size,
795 },
796
797 .send_pkt = vhost_transport_send_pkt,
798};
799
800static int __init vhost_vsock_init(void)
801{
802 int ret;
803
804 ret = vsock_core_init(&vhost_transport.transport);
805 if (ret < 0)
806 return ret;
807 return misc_register(&vhost_vsock_misc);
808};
809
810static void __exit vhost_vsock_exit(void)
811{
812 misc_deregister(&vhost_vsock_misc);
813 vsock_core_exit();
814};
815
816module_init(vhost_vsock_init);
817module_exit(vhost_vsock_exit);
818MODULE_LICENSE("GPL v2");
819MODULE_AUTHOR("Asias He");
820MODULE_DESCRIPTION("vhost transport for vsock ");
Stefan Hajnoczif4660cc2017-05-10 10:19:18 -0400821MODULE_ALIAS_MISCDEV(VHOST_VSOCK_MINOR);
822MODULE_ALIAS("devname:vhost-vsock");