blob: 9edf104739dade9dc67c4048454e26377c12bd77 [file] [log] [blame]
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001/*
2 * drivers/net/veth.c
3 *
4 * Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc
5 *
6 * Author: Pavel Emelianov <xemul@openvz.org>
7 * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com>
8 *
9 */
10
Pavel Emelyanove314dbd2007-09-25 16:14:46 -070011#include <linux/netdevice.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090012#include <linux/slab.h>
Pavel Emelyanove314dbd2007-09-25 16:14:46 -070013#include <linux/ethtool.h>
14#include <linux/etherdevice.h>
Eric Dumazetcf05c702011-06-19 22:48:34 -070015#include <linux/u64_stats_sync.h>
Pavel Emelyanove314dbd2007-09-25 16:14:46 -070016
Jiri Pirkof7b12602014-02-18 20:53:18 +010017#include <net/rtnetlink.h>
Pavel Emelyanove314dbd2007-09-25 16:14:46 -070018#include <net/dst.h>
19#include <net/xfrm.h>
Stephen Hemmingerecef9692007-12-25 17:23:59 -080020#include <linux/veth.h>
Paul Gortmaker9d9779e2011-07-03 15:21:01 -040021#include <linux/module.h>
Toshiaki Makita948d4f22018-08-03 16:58:10 +090022#include <linux/bpf.h>
23#include <linux/filter.h>
24#include <linux/ptr_ring.h>
25#include <linux/skb_array.h>
26#include <linux/bpf_trace.h>
Pavel Emelyanove314dbd2007-09-25 16:14:46 -070027
28#define DRV_NAME "veth"
29#define DRV_VERSION "1.0"
30
Toshiaki Makita948d4f22018-08-03 16:58:10 +090031#define VETH_RING_SIZE 256
32#define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN)
33
Eric Dumazet26811282012-12-29 16:02:43 +000034struct pcpu_vstats {
35 u64 packets;
36 u64 bytes;
Eric Dumazetcf05c702011-06-19 22:48:34 -070037 struct u64_stats_sync syncp;
Pavel Emelyanove314dbd2007-09-25 16:14:46 -070038};
39
40struct veth_priv {
Toshiaki Makita948d4f22018-08-03 16:58:10 +090041 struct napi_struct xdp_napi;
42 struct net_device *dev;
43 struct bpf_prog __rcu *xdp_prog;
44 struct bpf_prog *_xdp_prog;
Eric Dumazetd0e2c552013-01-04 15:42:40 +000045 struct net_device __rcu *peer;
Eric Dumazet26811282012-12-29 16:02:43 +000046 atomic64_t dropped;
Paolo Abeni163e5292016-02-26 10:45:41 +010047 unsigned requested_headroom;
Toshiaki Makita948d4f22018-08-03 16:58:10 +090048 bool rx_notify_masked;
49 struct ptr_ring xdp_ring;
50 struct xdp_rxq_info xdp_rxq;
Pavel Emelyanove314dbd2007-09-25 16:14:46 -070051};
52
Pavel Emelyanove314dbd2007-09-25 16:14:46 -070053/*
54 * ethtool interface
55 */
56
57static struct {
58 const char string[ETH_GSTRING_LEN];
59} ethtool_stats_keys[] = {
60 { "peer_ifindex" },
61};
62
Philippe Reynes56607b92017-03-29 08:24:21 +020063static int veth_get_link_ksettings(struct net_device *dev,
64 struct ethtool_link_ksettings *cmd)
Pavel Emelyanove314dbd2007-09-25 16:14:46 -070065{
Philippe Reynes56607b92017-03-29 08:24:21 +020066 cmd->base.speed = SPEED_10000;
67 cmd->base.duplex = DUPLEX_FULL;
68 cmd->base.port = PORT_TP;
69 cmd->base.autoneg = AUTONEG_DISABLE;
Pavel Emelyanove314dbd2007-09-25 16:14:46 -070070 return 0;
71}
72
73static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
74{
Rick Jones33a5ba12011-11-15 14:59:53 +000075 strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
76 strlcpy(info->version, DRV_VERSION, sizeof(info->version));
Pavel Emelyanove314dbd2007-09-25 16:14:46 -070077}
78
79static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
80{
81 switch(stringset) {
82 case ETH_SS_STATS:
83 memcpy(buf, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
84 break;
85 }
86}
87
Jeff Garzikb9f2c042007-10-03 18:07:32 -070088static int veth_get_sset_count(struct net_device *dev, int sset)
Pavel Emelyanove314dbd2007-09-25 16:14:46 -070089{
Jeff Garzikb9f2c042007-10-03 18:07:32 -070090 switch (sset) {
91 case ETH_SS_STATS:
92 return ARRAY_SIZE(ethtool_stats_keys);
93 default:
94 return -EOPNOTSUPP;
95 }
Pavel Emelyanove314dbd2007-09-25 16:14:46 -070096}
97
98static void veth_get_ethtool_stats(struct net_device *dev,
99 struct ethtool_stats *stats, u64 *data)
100{
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000101 struct veth_priv *priv = netdev_priv(dev);
102 struct net_device *peer = rtnl_dereference(priv->peer);
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700103
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000104 data[0] = peer ? peer->ifindex : 0;
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700105}
106
Stephen Hemminger0fc0b732009-09-02 01:03:33 -0700107static const struct ethtool_ops veth_ethtool_ops = {
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700108 .get_drvinfo = veth_get_drvinfo,
109 .get_link = ethtool_op_get_link,
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700110 .get_strings = veth_get_strings,
Jeff Garzikb9f2c042007-10-03 18:07:32 -0700111 .get_sset_count = veth_get_sset_count,
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700112 .get_ethtool_stats = veth_get_ethtool_stats,
Philippe Reynes56607b92017-03-29 08:24:21 +0200113 .get_link_ksettings = veth_get_link_ksettings,
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700114};
115
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900116/* general routines */
117
118static void __veth_xdp_flush(struct veth_priv *priv)
119{
120 /* Write ptr_ring before reading rx_notify_masked */
121 smp_mb();
122 if (!priv->rx_notify_masked) {
123 priv->rx_notify_masked = true;
124 napi_schedule(&priv->xdp_napi);
125 }
126}
127
128static int veth_xdp_rx(struct veth_priv *priv, struct sk_buff *skb)
129{
130 if (unlikely(ptr_ring_produce(&priv->xdp_ring, skb))) {
131 dev_kfree_skb_any(skb);
132 return NET_RX_DROP;
133 }
134
135 return NET_RX_SUCCESS;
136}
137
138static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb, bool xdp)
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700139{
Eric Dumazet26811282012-12-29 16:02:43 +0000140 struct veth_priv *priv = netdev_priv(dev);
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900141
142 return __dev_forward_skb(dev, skb) ?: xdp ?
143 veth_xdp_rx(priv, skb) :
144 netif_rx(skb);
145}
146
147static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
148{
149 struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000150 struct net_device *rcv;
Eric Dumazet26811282012-12-29 16:02:43 +0000151 int length = skb->len;
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900152 bool rcv_xdp = false;
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700153
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000154 rcu_read_lock();
155 rcv = rcu_dereference(priv->peer);
156 if (unlikely(!rcv)) {
157 kfree_skb(skb);
158 goto drop;
159 }
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700160
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900161 rcv_priv = netdev_priv(rcv);
162 rcv_xdp = rcu_access_pointer(rcv_priv->xdp_prog);
163
164 if (likely(veth_forward_skb(rcv, skb, rcv_xdp) == NET_RX_SUCCESS)) {
Eric Dumazet26811282012-12-29 16:02:43 +0000165 struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats);
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700166
Eric Dumazet26811282012-12-29 16:02:43 +0000167 u64_stats_update_begin(&stats->syncp);
168 stats->bytes += length;
169 stats->packets++;
170 u64_stats_update_end(&stats->syncp);
171 } else {
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000172drop:
Eric Dumazet26811282012-12-29 16:02:43 +0000173 atomic64_inc(&priv->dropped);
174 }
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900175
176 if (rcv_xdp)
177 __veth_xdp_flush(rcv_priv);
178
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000179 rcu_read_unlock();
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900180
Patrick McHardy6ed10652009-06-23 06:03:08 +0000181 return NETDEV_TX_OK;
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700182}
183
Eric Dumazet26811282012-12-29 16:02:43 +0000184static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev)
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700185{
Eric Dumazetcf05c702011-06-19 22:48:34 -0700186 struct veth_priv *priv = netdev_priv(dev);
David S. Miller11687a12009-06-25 02:45:42 -0700187 int cpu;
David S. Miller11687a12009-06-25 02:45:42 -0700188
Eric Dumazet26811282012-12-29 16:02:43 +0000189 result->packets = 0;
190 result->bytes = 0;
Eric Dumazet2b1c8b02009-11-18 07:09:39 +0000191 for_each_possible_cpu(cpu) {
Eric Dumazet26811282012-12-29 16:02:43 +0000192 struct pcpu_vstats *stats = per_cpu_ptr(dev->vstats, cpu);
193 u64 packets, bytes;
Eric Dumazetcf05c702011-06-19 22:48:34 -0700194 unsigned int start;
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700195
Eric Dumazetcf05c702011-06-19 22:48:34 -0700196 do {
Eric W. Biederman57a77442014-03-13 21:26:42 -0700197 start = u64_stats_fetch_begin_irq(&stats->syncp);
Eric Dumazet26811282012-12-29 16:02:43 +0000198 packets = stats->packets;
199 bytes = stats->bytes;
Eric W. Biederman57a77442014-03-13 21:26:42 -0700200 } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
Eric Dumazet26811282012-12-29 16:02:43 +0000201 result->packets += packets;
202 result->bytes += bytes;
David S. Miller11687a12009-06-25 02:45:42 -0700203 }
Eric Dumazet26811282012-12-29 16:02:43 +0000204 return atomic64_read(&priv->dropped);
205}
206
stephen hemmingerbc1f4472017-01-06 19:12:52 -0800207static void veth_get_stats64(struct net_device *dev,
208 struct rtnl_link_stats64 *tot)
Eric Dumazet26811282012-12-29 16:02:43 +0000209{
210 struct veth_priv *priv = netdev_priv(dev);
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000211 struct net_device *peer;
Eric Dumazet26811282012-12-29 16:02:43 +0000212 struct pcpu_vstats one;
213
214 tot->tx_dropped = veth_stats_one(&one, dev);
215 tot->tx_bytes = one.bytes;
216 tot->tx_packets = one.packets;
217
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000218 rcu_read_lock();
219 peer = rcu_dereference(priv->peer);
220 if (peer) {
221 tot->rx_dropped = veth_stats_one(&one, peer);
222 tot->rx_bytes = one.bytes;
223 tot->rx_packets = one.packets;
224 }
225 rcu_read_unlock();
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700226}
227
Gao feng5c70ef82013-10-04 16:52:24 +0800228/* fake multicast ability */
229static void veth_set_multicast_list(struct net_device *dev)
230{
231}
232
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900233static struct sk_buff *veth_build_skb(void *head, int headroom, int len,
234 int buflen)
235{
236 struct sk_buff *skb;
237
238 if (!buflen) {
239 buflen = SKB_DATA_ALIGN(headroom + len) +
240 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
241 }
242 skb = build_skb(head, buflen);
243 if (!skb)
244 return NULL;
245
246 skb_reserve(skb, headroom);
247 skb_put(skb, len);
248
249 return skb;
250}
251
252static struct sk_buff *veth_xdp_rcv_skb(struct veth_priv *priv,
253 struct sk_buff *skb)
254{
255 u32 pktlen, headroom, act, metalen;
256 void *orig_data, *orig_data_end;
257 struct bpf_prog *xdp_prog;
258 int mac_len, delta, off;
259 struct xdp_buff xdp;
260
261 rcu_read_lock();
262 xdp_prog = rcu_dereference(priv->xdp_prog);
263 if (unlikely(!xdp_prog)) {
264 rcu_read_unlock();
265 goto out;
266 }
267
268 mac_len = skb->data - skb_mac_header(skb);
269 pktlen = skb->len + mac_len;
270 headroom = skb_headroom(skb) - mac_len;
271
272 if (skb_shared(skb) || skb_head_is_locked(skb) ||
273 skb_is_nonlinear(skb) || headroom < XDP_PACKET_HEADROOM) {
274 struct sk_buff *nskb;
275 int size, head_off;
276 void *head, *start;
277 struct page *page;
278
279 size = SKB_DATA_ALIGN(VETH_XDP_HEADROOM + pktlen) +
280 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
281 if (size > PAGE_SIZE)
282 goto drop;
283
284 page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
285 if (!page)
286 goto drop;
287
288 head = page_address(page);
289 start = head + VETH_XDP_HEADROOM;
290 if (skb_copy_bits(skb, -mac_len, start, pktlen)) {
291 page_frag_free(head);
292 goto drop;
293 }
294
295 nskb = veth_build_skb(head,
296 VETH_XDP_HEADROOM + mac_len, skb->len,
297 PAGE_SIZE);
298 if (!nskb) {
299 page_frag_free(head);
300 goto drop;
301 }
302
303 skb_copy_header(nskb, skb);
304 head_off = skb_headroom(nskb) - skb_headroom(skb);
305 skb_headers_offset_update(nskb, head_off);
306 if (skb->sk)
307 skb_set_owner_w(nskb, skb->sk);
308 consume_skb(skb);
309 skb = nskb;
310 }
311
312 xdp.data_hard_start = skb->head;
313 xdp.data = skb_mac_header(skb);
314 xdp.data_end = xdp.data + pktlen;
315 xdp.data_meta = xdp.data;
316 xdp.rxq = &priv->xdp_rxq;
317 orig_data = xdp.data;
318 orig_data_end = xdp.data_end;
319
320 act = bpf_prog_run_xdp(xdp_prog, &xdp);
321
322 switch (act) {
323 case XDP_PASS:
324 break;
325 default:
326 bpf_warn_invalid_xdp_action(act);
327 case XDP_ABORTED:
328 trace_xdp_exception(priv->dev, xdp_prog, act);
329 case XDP_DROP:
330 goto drop;
331 }
332 rcu_read_unlock();
333
334 delta = orig_data - xdp.data;
335 off = mac_len + delta;
336 if (off > 0)
337 __skb_push(skb, off);
338 else if (off < 0)
339 __skb_pull(skb, -off);
340 skb->mac_header -= delta;
341 off = xdp.data_end - orig_data_end;
342 if (off != 0)
343 __skb_put(skb, off);
344 skb->protocol = eth_type_trans(skb, priv->dev);
345
346 metalen = xdp.data - xdp.data_meta;
347 if (metalen)
348 skb_metadata_set(skb, metalen);
349out:
350 return skb;
351drop:
352 rcu_read_unlock();
353 kfree_skb(skb);
354 return NULL;
355}
356
357static int veth_xdp_rcv(struct veth_priv *priv, int budget)
358{
359 int i, done = 0;
360
361 for (i = 0; i < budget; i++) {
362 struct sk_buff *skb = __ptr_ring_consume(&priv->xdp_ring);
363
364 if (!skb)
365 break;
366
367 skb = veth_xdp_rcv_skb(priv, skb);
368
369 if (skb)
370 napi_gro_receive(&priv->xdp_napi, skb);
371
372 done++;
373 }
374
375 return done;
376}
377
378static int veth_poll(struct napi_struct *napi, int budget)
379{
380 struct veth_priv *priv =
381 container_of(napi, struct veth_priv, xdp_napi);
382 int done;
383
384 done = veth_xdp_rcv(priv, budget);
385
386 if (done < budget && napi_complete_done(napi, done)) {
387 /* Write rx_notify_masked before reading ptr_ring */
388 smp_store_mb(priv->rx_notify_masked, false);
389 if (unlikely(!__ptr_ring_empty(&priv->xdp_ring))) {
390 priv->rx_notify_masked = true;
391 napi_schedule(&priv->xdp_napi);
392 }
393 }
394
395 return done;
396}
397
398static int veth_napi_add(struct net_device *dev)
399{
400 struct veth_priv *priv = netdev_priv(dev);
401 int err;
402
403 err = ptr_ring_init(&priv->xdp_ring, VETH_RING_SIZE, GFP_KERNEL);
404 if (err)
405 return err;
406
407 netif_napi_add(dev, &priv->xdp_napi, veth_poll, NAPI_POLL_WEIGHT);
408 napi_enable(&priv->xdp_napi);
409
410 return 0;
411}
412
413static void veth_napi_del(struct net_device *dev)
414{
415 struct veth_priv *priv = netdev_priv(dev);
416
417 napi_disable(&priv->xdp_napi);
418 netif_napi_del(&priv->xdp_napi);
419 priv->rx_notify_masked = false;
420 ptr_ring_cleanup(&priv->xdp_ring, __skb_array_destroy_skb);
421}
422
423static int veth_enable_xdp(struct net_device *dev)
424{
425 struct veth_priv *priv = netdev_priv(dev);
426 int err;
427
428 if (!xdp_rxq_info_is_reg(&priv->xdp_rxq)) {
429 err = xdp_rxq_info_reg(&priv->xdp_rxq, dev, 0);
430 if (err < 0)
431 return err;
432
433 err = xdp_rxq_info_reg_mem_model(&priv->xdp_rxq,
434 MEM_TYPE_PAGE_SHARED, NULL);
435 if (err < 0)
436 goto err;
437
438 err = veth_napi_add(dev);
439 if (err)
440 goto err;
441 }
442
443 rcu_assign_pointer(priv->xdp_prog, priv->_xdp_prog);
444
445 return 0;
446err:
447 xdp_rxq_info_unreg(&priv->xdp_rxq);
448
449 return err;
450}
451
452static void veth_disable_xdp(struct net_device *dev)
453{
454 struct veth_priv *priv = netdev_priv(dev);
455
456 rcu_assign_pointer(priv->xdp_prog, NULL);
457 veth_napi_del(dev);
458 xdp_rxq_info_unreg(&priv->xdp_rxq);
459}
460
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700461static int veth_open(struct net_device *dev)
462{
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000463 struct veth_priv *priv = netdev_priv(dev);
464 struct net_device *peer = rtnl_dereference(priv->peer);
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900465 int err;
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700466
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000467 if (!peer)
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700468 return -ENOTCONN;
469
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900470 if (priv->_xdp_prog) {
471 err = veth_enable_xdp(dev);
472 if (err)
473 return err;
474 }
475
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000476 if (peer->flags & IFF_UP) {
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700477 netif_carrier_on(dev);
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000478 netif_carrier_on(peer);
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700479 }
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900480
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700481 return 0;
482}
483
Eric W. Biederman2cf48a12009-02-25 19:47:29 +0000484static int veth_close(struct net_device *dev)
485{
486 struct veth_priv *priv = netdev_priv(dev);
Eric Dumazet2efd32e2013-01-10 08:32:45 +0000487 struct net_device *peer = rtnl_dereference(priv->peer);
Eric W. Biederman2cf48a12009-02-25 19:47:29 +0000488
489 netif_carrier_off(dev);
Eric Dumazet2efd32e2013-01-10 08:32:45 +0000490 if (peer)
491 netif_carrier_off(peer);
Eric W. Biederman2cf48a12009-02-25 19:47:29 +0000492
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900493 if (priv->_xdp_prog)
494 veth_disable_xdp(dev);
495
Eric W. Biederman2cf48a12009-02-25 19:47:29 +0000496 return 0;
497}
498
Jarod Wilson91572082016-10-20 13:55:20 -0400499static int is_valid_veth_mtu(int mtu)
Eric Biederman38d40812009-03-03 23:36:04 -0800500{
Jarod Wilson91572082016-10-20 13:55:20 -0400501 return mtu >= ETH_MIN_MTU && mtu <= ETH_MAX_MTU;
Eric Biederman38d40812009-03-03 23:36:04 -0800502}
503
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700504static int veth_dev_init(struct net_device *dev)
505{
WANG Cong1c213bd2014-02-13 11:46:28 -0800506 dev->vstats = netdev_alloc_pcpu_stats(struct pcpu_vstats);
Eric Dumazet26811282012-12-29 16:02:43 +0000507 if (!dev->vstats)
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700508 return -ENOMEM;
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700509 return 0;
510}
511
David S. Miller11687a12009-06-25 02:45:42 -0700512static void veth_dev_free(struct net_device *dev)
513{
Eric Dumazet26811282012-12-29 16:02:43 +0000514 free_percpu(dev->vstats);
David S. Miller11687a12009-06-25 02:45:42 -0700515}
516
WANG Congbb446c12014-06-23 15:36:02 -0700517#ifdef CONFIG_NET_POLL_CONTROLLER
518static void veth_poll_controller(struct net_device *dev)
519{
520 /* veth only receives frames when its peer sends one
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900521 * Since it has nothing to do with disabling irqs, we are guaranteed
WANG Congbb446c12014-06-23 15:36:02 -0700522 * never to have pending data when we poll for it so
523 * there is nothing to do here.
524 *
525 * We need this though so netpoll recognizes us as an interface that
526 * supports polling, which enables bridge devices in virt setups to
527 * still use netconsole
528 */
529}
530#endif /* CONFIG_NET_POLL_CONTROLLER */
531
Nicolas Dichtela45253b2015-04-02 17:07:11 +0200532static int veth_get_iflink(const struct net_device *dev)
533{
534 struct veth_priv *priv = netdev_priv(dev);
535 struct net_device *peer;
536 int iflink;
537
538 rcu_read_lock();
539 peer = rcu_dereference(priv->peer);
540 iflink = peer ? peer->ifindex : 0;
541 rcu_read_unlock();
542
543 return iflink;
544}
545
Toshiaki Makitadc224822018-08-03 16:58:11 +0900546static netdev_features_t veth_fix_features(struct net_device *dev,
547 netdev_features_t features)
548{
549 struct veth_priv *priv = netdev_priv(dev);
550 struct net_device *peer;
551
552 peer = rtnl_dereference(priv->peer);
553 if (peer) {
554 struct veth_priv *peer_priv = netdev_priv(peer);
555
556 if (peer_priv->_xdp_prog)
557 features &= ~NETIF_F_GSO_SOFTWARE;
558 }
559
560 return features;
561}
562
Paolo Abeni163e5292016-02-26 10:45:41 +0100563static void veth_set_rx_headroom(struct net_device *dev, int new_hr)
564{
565 struct veth_priv *peer_priv, *priv = netdev_priv(dev);
566 struct net_device *peer;
567
568 if (new_hr < 0)
569 new_hr = 0;
570
571 rcu_read_lock();
572 peer = rcu_dereference(priv->peer);
573 if (unlikely(!peer))
574 goto out;
575
576 peer_priv = netdev_priv(peer);
577 priv->requested_headroom = new_hr;
578 new_hr = max(priv->requested_headroom, peer_priv->requested_headroom);
579 dev->needed_headroom = new_hr;
580 peer->needed_headroom = new_hr;
581
582out:
583 rcu_read_unlock();
584}
585
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900586static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
587 struct netlink_ext_ack *extack)
588{
589 struct veth_priv *priv = netdev_priv(dev);
590 struct bpf_prog *old_prog;
591 struct net_device *peer;
Toshiaki Makitadc224822018-08-03 16:58:11 +0900592 unsigned int max_mtu;
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900593 int err;
594
595 old_prog = priv->_xdp_prog;
596 priv->_xdp_prog = prog;
597 peer = rtnl_dereference(priv->peer);
598
599 if (prog) {
600 if (!peer) {
601 NL_SET_ERR_MSG_MOD(extack, "Cannot set XDP when peer is detached");
602 err = -ENOTCONN;
603 goto err;
604 }
605
Toshiaki Makitadc224822018-08-03 16:58:11 +0900606 max_mtu = PAGE_SIZE - VETH_XDP_HEADROOM -
607 peer->hard_header_len -
608 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
609 if (peer->mtu > max_mtu) {
610 NL_SET_ERR_MSG_MOD(extack, "Peer MTU is too large to set XDP");
611 err = -ERANGE;
612 goto err;
613 }
614
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900615 if (dev->flags & IFF_UP) {
616 err = veth_enable_xdp(dev);
617 if (err) {
618 NL_SET_ERR_MSG_MOD(extack, "Setup for XDP failed");
619 goto err;
620 }
621 }
Toshiaki Makitadc224822018-08-03 16:58:11 +0900622
623 if (!old_prog) {
624 peer->hw_features &= ~NETIF_F_GSO_SOFTWARE;
625 peer->max_mtu = max_mtu;
626 }
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900627 }
628
629 if (old_prog) {
Toshiaki Makitadc224822018-08-03 16:58:11 +0900630 if (!prog) {
631 if (dev->flags & IFF_UP)
632 veth_disable_xdp(dev);
633
634 if (peer) {
635 peer->hw_features |= NETIF_F_GSO_SOFTWARE;
636 peer->max_mtu = ETH_MAX_MTU;
637 }
638 }
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900639 bpf_prog_put(old_prog);
640 }
641
Toshiaki Makitadc224822018-08-03 16:58:11 +0900642 if ((!!old_prog ^ !!prog) && peer)
643 netdev_update_features(peer);
644
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900645 return 0;
646err:
647 priv->_xdp_prog = old_prog;
648
649 return err;
650}
651
652static u32 veth_xdp_query(struct net_device *dev)
653{
654 struct veth_priv *priv = netdev_priv(dev);
655 const struct bpf_prog *xdp_prog;
656
657 xdp_prog = priv->_xdp_prog;
658 if (xdp_prog)
659 return xdp_prog->aux->id;
660
661 return 0;
662}
663
664static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
665{
666 switch (xdp->command) {
667 case XDP_SETUP_PROG:
668 return veth_xdp_set(dev, xdp->prog, xdp->extack);
669 case XDP_QUERY_PROG:
670 xdp->prog_id = veth_xdp_query(dev);
671 return 0;
672 default:
673 return -EINVAL;
674 }
675}
676
Stephen Hemminger4456e7b2008-11-19 21:50:10 -0800677static const struct net_device_ops veth_netdev_ops = {
Daniel Lezcanoee923622009-02-22 00:04:45 -0800678 .ndo_init = veth_dev_init,
679 .ndo_open = veth_open,
Eric W. Biederman2cf48a12009-02-25 19:47:29 +0000680 .ndo_stop = veth_close,
Daniel Lezcanoee923622009-02-22 00:04:45 -0800681 .ndo_start_xmit = veth_xmit,
stephen hemminger6311cc42011-06-08 14:53:59 +0000682 .ndo_get_stats64 = veth_get_stats64,
Gao feng5c70ef82013-10-04 16:52:24 +0800683 .ndo_set_rx_mode = veth_set_multicast_list,
Daniel Lezcanoee923622009-02-22 00:04:45 -0800684 .ndo_set_mac_address = eth_mac_addr,
WANG Congbb446c12014-06-23 15:36:02 -0700685#ifdef CONFIG_NET_POLL_CONTROLLER
686 .ndo_poll_controller = veth_poll_controller,
687#endif
Nicolas Dichtela45253b2015-04-02 17:07:11 +0200688 .ndo_get_iflink = veth_get_iflink,
Toshiaki Makitadc224822018-08-03 16:58:11 +0900689 .ndo_fix_features = veth_fix_features,
Toshiaki Makita1a04a822015-07-31 15:03:25 +0900690 .ndo_features_check = passthru_features_check,
Paolo Abeni163e5292016-02-26 10:45:41 +0100691 .ndo_set_rx_headroom = veth_set_rx_headroom,
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900692 .ndo_bpf = veth_xdp,
Stephen Hemminger4456e7b2008-11-19 21:50:10 -0800693};
694
Alexander Duyck732912d72016-04-19 14:02:26 -0400695#define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
Xin Longc80fafb2016-08-25 13:21:49 +0800696 NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \
Alexander Duyck732912d72016-04-19 14:02:26 -0400697 NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \
Patrick McHardy28d2b132013-04-19 02:04:32 +0000698 NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \
699 NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX )
Eric Dumazet80933152012-12-29 16:26:10 +0000700
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700701static void veth_setup(struct net_device *dev)
702{
703 ether_setup(dev);
704
Neil Horman550fd082011-07-26 06:05:38 +0000705 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
Hannes Frederic Sowa23ea5a92012-10-30 16:22:01 +0000706 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
Phil Sutter02f01ec2015-08-18 10:30:29 +0200707 dev->priv_flags |= IFF_NO_QUEUE;
Paolo Abeni163e5292016-02-26 10:45:41 +0100708 dev->priv_flags |= IFF_PHONY_HEADROOM;
Neil Horman550fd082011-07-26 06:05:38 +0000709
Stephen Hemminger4456e7b2008-11-19 21:50:10 -0800710 dev->netdev_ops = &veth_netdev_ops;
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700711 dev->ethtool_ops = &veth_ethtool_ops;
712 dev->features |= NETIF_F_LLTX;
Eric Dumazet80933152012-12-29 16:26:10 +0000713 dev->features |= VETH_FEATURES;
Toshiaki Makita8d0d21f2014-02-18 21:20:08 +0900714 dev->vlan_features = dev->features &
Vlad Yasevich3f8c7072014-03-27 22:14:48 -0400715 ~(NETIF_F_HW_VLAN_CTAG_TX |
716 NETIF_F_HW_VLAN_STAG_TX |
717 NETIF_F_HW_VLAN_CTAG_RX |
718 NETIF_F_HW_VLAN_STAG_RX);
David S. Millercf124db2017-05-08 12:52:56 -0400719 dev->needs_free_netdev = true;
720 dev->priv_destructor = veth_dev_free;
Jarod Wilson91572082016-10-20 13:55:20 -0400721 dev->max_mtu = ETH_MAX_MTU;
Michał Mirosława2c725f2011-03-31 01:01:35 +0000722
Eric Dumazet80933152012-12-29 16:26:10 +0000723 dev->hw_features = VETH_FEATURES;
Eric Dumazet82d81892013-10-25 18:25:03 -0700724 dev->hw_enc_features = VETH_FEATURES;
David Ahern607fca92016-08-24 20:10:45 -0700725 dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE;
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700726}
727
728/*
729 * netlink interface
730 */
731
Matthias Schiffera8b8a8892017-06-25 23:56:01 +0200732static int veth_validate(struct nlattr *tb[], struct nlattr *data[],
733 struct netlink_ext_ack *extack)
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700734{
735 if (tb[IFLA_ADDRESS]) {
736 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
737 return -EINVAL;
738 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
739 return -EADDRNOTAVAIL;
740 }
Eric Biederman38d40812009-03-03 23:36:04 -0800741 if (tb[IFLA_MTU]) {
742 if (!is_valid_veth_mtu(nla_get_u32(tb[IFLA_MTU])))
743 return -EINVAL;
744 }
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700745 return 0;
746}
747
748static struct rtnl_link_ops veth_link_ops;
749
Eric W. Biederman81adee42009-11-08 00:53:51 -0800750static int veth_newlink(struct net *src_net, struct net_device *dev,
Matthias Schiffer7a3f4a12017-06-25 23:55:59 +0200751 struct nlattr *tb[], struct nlattr *data[],
752 struct netlink_ext_ack *extack)
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700753{
754 int err;
755 struct net_device *peer;
756 struct veth_priv *priv;
757 char ifname[IFNAMSIZ];
758 struct nlattr *peer_tb[IFLA_MAX + 1], **tbp;
Tom Gundersen55177502014-07-14 16:37:25 +0200759 unsigned char name_assign_type;
Patrick McHardy3729d502010-02-26 06:34:54 +0000760 struct ifinfomsg *ifmp;
Eric W. Biederman81adee42009-11-08 00:53:51 -0800761 struct net *net;
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700762
763 /*
764 * create and register peer first
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700765 */
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700766 if (data != NULL && data[VETH_INFO_PEER] != NULL) {
767 struct nlattr *nla_peer;
768
769 nla_peer = data[VETH_INFO_PEER];
Patrick McHardy3729d502010-02-26 06:34:54 +0000770 ifmp = nla_data(nla_peer);
Jiri Pirkof7b12602014-02-18 20:53:18 +0100771 err = rtnl_nla_parse_ifla(peer_tb,
772 nla_data(nla_peer) + sizeof(struct ifinfomsg),
Johannes Bergfceb6432017-04-12 14:34:07 +0200773 nla_len(nla_peer) - sizeof(struct ifinfomsg),
774 NULL);
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700775 if (err < 0)
776 return err;
777
Matthias Schiffera8b8a8892017-06-25 23:56:01 +0200778 err = veth_validate(peer_tb, NULL, extack);
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700779 if (err < 0)
780 return err;
781
782 tbp = peer_tb;
Patrick McHardy3729d502010-02-26 06:34:54 +0000783 } else {
784 ifmp = NULL;
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700785 tbp = tb;
Patrick McHardy3729d502010-02-26 06:34:54 +0000786 }
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700787
Serhey Popovych191cdb32017-06-21 12:12:24 +0300788 if (ifmp && tbp[IFLA_IFNAME]) {
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700789 nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
Tom Gundersen55177502014-07-14 16:37:25 +0200790 name_assign_type = NET_NAME_USER;
791 } else {
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700792 snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d");
Tom Gundersen55177502014-07-14 16:37:25 +0200793 name_assign_type = NET_NAME_ENUM;
794 }
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700795
Eric W. Biederman81adee42009-11-08 00:53:51 -0800796 net = rtnl_link_get_net(src_net, tbp);
797 if (IS_ERR(net))
798 return PTR_ERR(net);
799
Tom Gundersen55177502014-07-14 16:37:25 +0200800 peer = rtnl_create_link(net, ifname, name_assign_type,
801 &veth_link_ops, tbp);
Eric W. Biederman81adee42009-11-08 00:53:51 -0800802 if (IS_ERR(peer)) {
803 put_net(net);
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700804 return PTR_ERR(peer);
Eric W. Biederman81adee42009-11-08 00:53:51 -0800805 }
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700806
Serhey Popovych191cdb32017-06-21 12:12:24 +0300807 if (!ifmp || !tbp[IFLA_ADDRESS])
Danny Kukawkaf2cedb62012-02-15 06:45:39 +0000808 eth_hw_addr_random(peer);
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700809
Pavel Emelyanove6f8f1a2012-08-08 21:53:03 +0000810 if (ifmp && (dev->ifindex != 0))
811 peer->ifindex = ifmp->ifi_index;
812
Stephen Hemminger72d249552017-12-07 15:40:20 -0800813 peer->gso_max_size = dev->gso_max_size;
814 peer->gso_max_segs = dev->gso_max_segs;
815
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700816 err = register_netdevice(peer);
Eric W. Biederman81adee42009-11-08 00:53:51 -0800817 put_net(net);
818 net = NULL;
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700819 if (err < 0)
820 goto err_register_peer;
821
822 netif_carrier_off(peer);
823
Patrick McHardy3729d502010-02-26 06:34:54 +0000824 err = rtnl_configure_link(peer, ifmp);
825 if (err < 0)
826 goto err_configure_peer;
827
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700828 /*
829 * register dev last
830 *
831 * note, that since we've registered new device the dev's name
832 * should be re-allocated
833 */
834
835 if (tb[IFLA_ADDRESS] == NULL)
Danny Kukawkaf2cedb62012-02-15 06:45:39 +0000836 eth_hw_addr_random(dev);
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700837
Jiri Pirko6c8c4442011-04-30 01:28:17 +0000838 if (tb[IFLA_IFNAME])
839 nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
840 else
841 snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d");
842
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700843 err = register_netdevice(dev);
844 if (err < 0)
845 goto err_register_dev;
846
847 netif_carrier_off(dev);
848
849 /*
850 * tie the deviced together
851 */
852
853 priv = netdev_priv(dev);
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900854 priv->dev = dev;
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000855 rcu_assign_pointer(priv->peer, peer);
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700856
857 priv = netdev_priv(peer);
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900858 priv->dev = peer;
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000859 rcu_assign_pointer(priv->peer, dev);
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900860
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700861 return 0;
862
863err_register_dev:
864 /* nothing to do */
Patrick McHardy3729d502010-02-26 06:34:54 +0000865err_configure_peer:
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700866 unregister_netdevice(peer);
867 return err;
868
869err_register_peer:
870 free_netdev(peer);
871 return err;
872}
873
Eric Dumazet23289a32009-10-27 07:06:36 +0000874static void veth_dellink(struct net_device *dev, struct list_head *head)
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700875{
876 struct veth_priv *priv;
877 struct net_device *peer;
878
879 priv = netdev_priv(dev);
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000880 peer = rtnl_dereference(priv->peer);
881
882 /* Note : dellink() is called from default_device_exit_batch(),
883 * before a rcu_synchronize() point. The devices are guaranteed
884 * not being freed before one RCU grace period.
885 */
886 RCU_INIT_POINTER(priv->peer, NULL);
Eric Dumazet24540532009-10-30 01:00:27 -0700887 unregister_netdevice_queue(dev, head);
Eric Dumazetf45a5c22013-02-08 20:10:49 +0000888
889 if (peer) {
890 priv = netdev_priv(peer);
891 RCU_INIT_POINTER(priv->peer, NULL);
892 unregister_netdevice_queue(peer, head);
893 }
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700894}
895
Thomas Graf23711432012-02-15 04:09:46 +0000896static const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = {
897 [VETH_INFO_PEER] = { .len = sizeof(struct ifinfomsg) },
898};
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700899
Nicolas Dichtele5f4e7b2015-01-20 15:15:46 +0100900static struct net *veth_get_link_net(const struct net_device *dev)
901{
902 struct veth_priv *priv = netdev_priv(dev);
903 struct net_device *peer = rtnl_dereference(priv->peer);
904
905 return peer ? dev_net(peer) : dev_net(dev);
906}
907
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700908static struct rtnl_link_ops veth_link_ops = {
909 .kind = DRV_NAME,
910 .priv_size = sizeof(struct veth_priv),
911 .setup = veth_setup,
912 .validate = veth_validate,
913 .newlink = veth_newlink,
914 .dellink = veth_dellink,
915 .policy = veth_policy,
916 .maxtype = VETH_INFO_MAX,
Nicolas Dichtele5f4e7b2015-01-20 15:15:46 +0100917 .get_link_net = veth_get_link_net,
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700918};
919
920/*
921 * init/fini
922 */
923
924static __init int veth_init(void)
925{
926 return rtnl_link_register(&veth_link_ops);
927}
928
929static __exit void veth_exit(void)
930{
Patrick McHardy68365452008-01-20 17:25:14 -0800931 rtnl_link_unregister(&veth_link_ops);
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700932}
933
934module_init(veth_init);
935module_exit(veth_exit);
936
937MODULE_DESCRIPTION("Virtual Ethernet Tunnel");
938MODULE_LICENSE("GPL v2");
939MODULE_ALIAS_RTNL_LINK(DRV_NAME);