blob: bad8fd43206729f1467e7fe964ff4812ea9b7fd9 [file] [log] [blame]
Thomas Gleixner09c434b2019-05-19 13:08:20 +01001// SPDX-License-Identifier: GPL-2.0-only
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07002/*
3 * drivers/net/veth.c
4 *
5 * Copyright (C) 2007 OpenVZ http://openvz.org, SWsoft Inc
6 *
7 * Author: Pavel Emelianov <xemul@openvz.org>
8 * Ethtool interface from: Eric W. Biederman <ebiederm@xmission.com>
9 *
10 */
11
Pavel Emelyanove314dbd2007-09-25 16:14:46 -070012#include <linux/netdevice.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090013#include <linux/slab.h>
Pavel Emelyanove314dbd2007-09-25 16:14:46 -070014#include <linux/ethtool.h>
15#include <linux/etherdevice.h>
Eric Dumazetcf05c702011-06-19 22:48:34 -070016#include <linux/u64_stats_sync.h>
Pavel Emelyanove314dbd2007-09-25 16:14:46 -070017
Jiri Pirkof7b12602014-02-18 20:53:18 +010018#include <net/rtnetlink.h>
Pavel Emelyanove314dbd2007-09-25 16:14:46 -070019#include <net/dst.h>
20#include <net/xfrm.h>
Toshiaki Makitaaf87a3a2018-08-03 16:58:14 +090021#include <net/xdp.h>
Stephen Hemmingerecef9692007-12-25 17:23:59 -080022#include <linux/veth.h>
Paul Gortmaker9d9779e2011-07-03 15:21:01 -040023#include <linux/module.h>
Toshiaki Makita948d4f22018-08-03 16:58:10 +090024#include <linux/bpf.h>
25#include <linux/filter.h>
26#include <linux/ptr_ring.h>
Toshiaki Makita948d4f22018-08-03 16:58:10 +090027#include <linux/bpf_trace.h>
Michael Walleaa4e6892018-08-29 17:24:11 +020028#include <linux/net_tstamp.h>
Pavel Emelyanove314dbd2007-09-25 16:14:46 -070029
30#define DRV_NAME "veth"
31#define DRV_VERSION "1.0"
32
Toshiaki Makita9fc8d512018-08-03 16:58:13 +090033#define VETH_XDP_FLAG BIT(0)
Toshiaki Makita948d4f22018-08-03 16:58:10 +090034#define VETH_RING_SIZE 256
35#define VETH_XDP_HEADROOM (XDP_PACKET_HEADROOM + NET_IP_ALIGN)
36
Toshiaki Makita9cda7802019-06-13 18:39:59 +090037#define VETH_XDP_TX_BULK_SIZE 16
38
Lorenzo Bianconi65780c52020-03-19 17:41:25 +010039struct veth_stats {
Lorenzo Bianconi1c5b82e52020-03-19 17:41:26 +010040 u64 rx_drops;
41 /* xdp */
Lorenzo Bianconi65780c52020-03-19 17:41:25 +010042 u64 xdp_packets;
43 u64 xdp_bytes;
Lorenzo Bianconi1c5b82e52020-03-19 17:41:26 +010044 u64 xdp_redirect;
Lorenzo Bianconi65780c52020-03-19 17:41:25 +010045 u64 xdp_drops;
Lorenzo Bianconi1c5b82e52020-03-19 17:41:26 +010046 u64 xdp_tx;
Lorenzo Bianconi65780c52020-03-19 17:41:25 +010047};
48
Toshiaki Makita4195e542018-10-11 18:36:49 +090049struct veth_rq_stats {
Lorenzo Bianconi65780c52020-03-19 17:41:25 +010050 struct veth_stats vs;
Toshiaki Makita4195e542018-10-11 18:36:49 +090051 struct u64_stats_sync syncp;
52};
53
Toshiaki Makita638264d2018-08-03 16:58:18 +090054struct veth_rq {
Toshiaki Makita948d4f22018-08-03 16:58:10 +090055 struct napi_struct xdp_napi;
56 struct net_device *dev;
57 struct bpf_prog __rcu *xdp_prog;
Toshiaki Makitad1396002018-08-03 16:58:17 +090058 struct xdp_mem_info xdp_mem;
Toshiaki Makita4195e542018-10-11 18:36:49 +090059 struct veth_rq_stats stats;
Toshiaki Makita948d4f22018-08-03 16:58:10 +090060 bool rx_notify_masked;
61 struct ptr_ring xdp_ring;
62 struct xdp_rxq_info xdp_rxq;
Pavel Emelyanove314dbd2007-09-25 16:14:46 -070063};
64
Toshiaki Makita638264d2018-08-03 16:58:18 +090065struct veth_priv {
66 struct net_device __rcu *peer;
67 atomic64_t dropped;
68 struct bpf_prog *_xdp_prog;
69 struct veth_rq *rq;
70 unsigned int requested_headroom;
71};
72
Toshiaki Makita9cda7802019-06-13 18:39:59 +090073struct veth_xdp_tx_bq {
74 struct xdp_frame *q[VETH_XDP_TX_BULK_SIZE];
75 unsigned int count;
76};
77
Pavel Emelyanove314dbd2007-09-25 16:14:46 -070078/*
79 * ethtool interface
80 */
81
Toshiaki Makitad397b962018-10-11 18:36:50 +090082struct veth_q_stat_desc {
83 char desc[ETH_GSTRING_LEN];
84 size_t offset;
85};
86
Lorenzo Bianconi65780c52020-03-19 17:41:25 +010087#define VETH_RQ_STAT(m) offsetof(struct veth_stats, m)
Toshiaki Makitad397b962018-10-11 18:36:50 +090088
89static const struct veth_q_stat_desc veth_rq_stats_desc[] = {
90 { "xdp_packets", VETH_RQ_STAT(xdp_packets) },
91 { "xdp_bytes", VETH_RQ_STAT(xdp_bytes) },
92 { "xdp_drops", VETH_RQ_STAT(xdp_drops) },
93};
94
95#define VETH_RQ_STATS_LEN ARRAY_SIZE(veth_rq_stats_desc)
96
Pavel Emelyanove314dbd2007-09-25 16:14:46 -070097static struct {
98 const char string[ETH_GSTRING_LEN];
99} ethtool_stats_keys[] = {
100 { "peer_ifindex" },
101};
102
Philippe Reynes56607b92017-03-29 08:24:21 +0200103static int veth_get_link_ksettings(struct net_device *dev,
104 struct ethtool_link_ksettings *cmd)
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700105{
Philippe Reynes56607b92017-03-29 08:24:21 +0200106 cmd->base.speed = SPEED_10000;
107 cmd->base.duplex = DUPLEX_FULL;
108 cmd->base.port = PORT_TP;
109 cmd->base.autoneg = AUTONEG_DISABLE;
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700110 return 0;
111}
112
113static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
114{
Rick Jones33a5ba12011-11-15 14:59:53 +0000115 strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
116 strlcpy(info->version, DRV_VERSION, sizeof(info->version));
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700117}
118
119static void veth_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
120{
Toshiaki Makitad397b962018-10-11 18:36:50 +0900121 char *p = (char *)buf;
122 int i, j;
123
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700124 switch(stringset) {
125 case ETH_SS_STATS:
Toshiaki Makitad397b962018-10-11 18:36:50 +0900126 memcpy(p, &ethtool_stats_keys, sizeof(ethtool_stats_keys));
127 p += sizeof(ethtool_stats_keys);
128 for (i = 0; i < dev->real_num_rx_queues; i++) {
129 for (j = 0; j < VETH_RQ_STATS_LEN; j++) {
Florian Fainelliabdf47a2019-02-21 20:09:29 -0800130 snprintf(p, ETH_GSTRING_LEN,
131 "rx_queue_%u_%.11s",
Toshiaki Makitad397b962018-10-11 18:36:50 +0900132 i, veth_rq_stats_desc[j].desc);
133 p += ETH_GSTRING_LEN;
134 }
135 }
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700136 break;
137 }
138}
139
Jeff Garzikb9f2c042007-10-03 18:07:32 -0700140static int veth_get_sset_count(struct net_device *dev, int sset)
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700141{
Jeff Garzikb9f2c042007-10-03 18:07:32 -0700142 switch (sset) {
143 case ETH_SS_STATS:
Toshiaki Makitad397b962018-10-11 18:36:50 +0900144 return ARRAY_SIZE(ethtool_stats_keys) +
145 VETH_RQ_STATS_LEN * dev->real_num_rx_queues;
Jeff Garzikb9f2c042007-10-03 18:07:32 -0700146 default:
147 return -EOPNOTSUPP;
148 }
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700149}
150
151static void veth_get_ethtool_stats(struct net_device *dev,
152 struct ethtool_stats *stats, u64 *data)
153{
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000154 struct veth_priv *priv = netdev_priv(dev);
155 struct net_device *peer = rtnl_dereference(priv->peer);
Toshiaki Makitad397b962018-10-11 18:36:50 +0900156 int i, j, idx;
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700157
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000158 data[0] = peer ? peer->ifindex : 0;
Toshiaki Makitad397b962018-10-11 18:36:50 +0900159 idx = 1;
160 for (i = 0; i < dev->real_num_rx_queues; i++) {
161 const struct veth_rq_stats *rq_stats = &priv->rq[i].stats;
Lorenzo Bianconi65780c52020-03-19 17:41:25 +0100162 const void *stats_base = (void *)&rq_stats->vs;
Toshiaki Makitad397b962018-10-11 18:36:50 +0900163 unsigned int start;
164 size_t offset;
165
166 do {
167 start = u64_stats_fetch_begin_irq(&rq_stats->syncp);
168 for (j = 0; j < VETH_RQ_STATS_LEN; j++) {
169 offset = veth_rq_stats_desc[j].offset;
170 data[idx + j] = *(u64 *)(stats_base + offset);
171 }
172 } while (u64_stats_fetch_retry_irq(&rq_stats->syncp, start));
173 idx += VETH_RQ_STATS_LEN;
174 }
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700175}
176
Stephen Hemminger0fc0b732009-09-02 01:03:33 -0700177static const struct ethtool_ops veth_ethtool_ops = {
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700178 .get_drvinfo = veth_get_drvinfo,
179 .get_link = ethtool_op_get_link,
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700180 .get_strings = veth_get_strings,
Jeff Garzikb9f2c042007-10-03 18:07:32 -0700181 .get_sset_count = veth_get_sset_count,
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700182 .get_ethtool_stats = veth_get_ethtool_stats,
Philippe Reynes56607b92017-03-29 08:24:21 +0200183 .get_link_ksettings = veth_get_link_ksettings,
Julian Wiedmann056b21f2019-04-12 13:06:15 +0200184 .get_ts_info = ethtool_op_get_ts_info,
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700185};
186
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900187/* general routines */
188
Toshiaki Makita9fc8d512018-08-03 16:58:13 +0900189static bool veth_is_xdp_frame(void *ptr)
190{
191 return (unsigned long)ptr & VETH_XDP_FLAG;
192}
193
194static void *veth_ptr_to_xdp(void *ptr)
195{
196 return (void *)((unsigned long)ptr & ~VETH_XDP_FLAG);
197}
198
Toshiaki Makitaaf87a3a2018-08-03 16:58:14 +0900199static void *veth_xdp_to_ptr(void *ptr)
200{
201 return (void *)((unsigned long)ptr | VETH_XDP_FLAG);
202}
203
Toshiaki Makita9fc8d512018-08-03 16:58:13 +0900204static void veth_ptr_free(void *ptr)
205{
206 if (veth_is_xdp_frame(ptr))
207 xdp_return_frame(veth_ptr_to_xdp(ptr));
208 else
209 kfree_skb(ptr);
210}
211
Toshiaki Makita638264d2018-08-03 16:58:18 +0900212static void __veth_xdp_flush(struct veth_rq *rq)
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900213{
214 /* Write ptr_ring before reading rx_notify_masked */
215 smp_mb();
Toshiaki Makita638264d2018-08-03 16:58:18 +0900216 if (!rq->rx_notify_masked) {
217 rq->rx_notify_masked = true;
218 napi_schedule(&rq->xdp_napi);
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900219 }
220}
221
Toshiaki Makita638264d2018-08-03 16:58:18 +0900222static int veth_xdp_rx(struct veth_rq *rq, struct sk_buff *skb)
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900223{
Toshiaki Makita638264d2018-08-03 16:58:18 +0900224 if (unlikely(ptr_ring_produce(&rq->xdp_ring, skb))) {
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900225 dev_kfree_skb_any(skb);
226 return NET_RX_DROP;
227 }
228
229 return NET_RX_SUCCESS;
230}
231
Toshiaki Makita638264d2018-08-03 16:58:18 +0900232static int veth_forward_skb(struct net_device *dev, struct sk_buff *skb,
233 struct veth_rq *rq, bool xdp)
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700234{
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900235 return __dev_forward_skb(dev, skb) ?: xdp ?
Toshiaki Makita638264d2018-08-03 16:58:18 +0900236 veth_xdp_rx(rq, skb) :
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900237 netif_rx(skb);
238}
239
240static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
241{
242 struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
Toshiaki Makita638264d2018-08-03 16:58:18 +0900243 struct veth_rq *rq = NULL;
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000244 struct net_device *rcv;
Eric Dumazet26811282012-12-29 16:02:43 +0000245 int length = skb->len;
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900246 bool rcv_xdp = false;
Toshiaki Makita638264d2018-08-03 16:58:18 +0900247 int rxq;
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700248
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000249 rcu_read_lock();
250 rcv = rcu_dereference(priv->peer);
251 if (unlikely(!rcv)) {
252 kfree_skb(skb);
253 goto drop;
254 }
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700255
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900256 rcv_priv = netdev_priv(rcv);
Toshiaki Makita638264d2018-08-03 16:58:18 +0900257 rxq = skb_get_queue_mapping(skb);
258 if (rxq < rcv->real_num_rx_queues) {
259 rq = &rcv_priv->rq[rxq];
260 rcv_xdp = rcu_access_pointer(rq->xdp_prog);
261 if (rcv_xdp)
262 skb_record_rx_queue(skb, rxq);
263 }
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900264
Michael Walleaa4e6892018-08-29 17:24:11 +0200265 skb_tx_timestamp(skb);
Toshiaki Makita638264d2018-08-03 16:58:18 +0900266 if (likely(veth_forward_skb(rcv, skb, rq, rcv_xdp) == NET_RX_SUCCESS)) {
Eric Dumazetb4fba472019-11-07 16:27:17 -0800267 if (!rcv_xdp)
268 dev_lstats_add(dev, length);
Eric Dumazet26811282012-12-29 16:02:43 +0000269 } else {
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000270drop:
Eric Dumazet26811282012-12-29 16:02:43 +0000271 atomic64_inc(&priv->dropped);
272 }
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900273
274 if (rcv_xdp)
Toshiaki Makita638264d2018-08-03 16:58:18 +0900275 __veth_xdp_flush(rq);
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900276
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000277 rcu_read_unlock();
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900278
Patrick McHardy6ed10652009-06-23 06:03:08 +0000279 return NETDEV_TX_OK;
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700280}
281
Eric Dumazetb4fba472019-11-07 16:27:17 -0800282static u64 veth_stats_tx(struct net_device *dev, u64 *packets, u64 *bytes)
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700283{
Eric Dumazetcf05c702011-06-19 22:48:34 -0700284 struct veth_priv *priv = netdev_priv(dev);
David S. Miller11687a12009-06-25 02:45:42 -0700285
Eric Dumazetb4fba472019-11-07 16:27:17 -0800286 dev_lstats_read(dev, packets, bytes);
Eric Dumazet26811282012-12-29 16:02:43 +0000287 return atomic64_read(&priv->dropped);
288}
289
Lorenzo Bianconi65780c52020-03-19 17:41:25 +0100290static void veth_stats_rx(struct veth_stats *result, struct net_device *dev)
Toshiaki Makita4195e542018-10-11 18:36:49 +0900291{
292 struct veth_priv *priv = netdev_priv(dev);
293 int i;
294
295 result->xdp_packets = 0;
296 result->xdp_bytes = 0;
297 result->xdp_drops = 0;
298 for (i = 0; i < dev->num_rx_queues; i++) {
299 struct veth_rq_stats *stats = &priv->rq[i].stats;
300 u64 packets, bytes, drops;
301 unsigned int start;
302
303 do {
304 start = u64_stats_fetch_begin_irq(&stats->syncp);
Lorenzo Bianconi65780c52020-03-19 17:41:25 +0100305 packets = stats->vs.xdp_packets;
306 bytes = stats->vs.xdp_bytes;
307 drops = stats->vs.xdp_drops;
Toshiaki Makita4195e542018-10-11 18:36:49 +0900308 } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
309 result->xdp_packets += packets;
310 result->xdp_bytes += bytes;
311 result->xdp_drops += drops;
312 }
313}
314
stephen hemmingerbc1f4472017-01-06 19:12:52 -0800315static void veth_get_stats64(struct net_device *dev,
316 struct rtnl_link_stats64 *tot)
Eric Dumazet26811282012-12-29 16:02:43 +0000317{
318 struct veth_priv *priv = netdev_priv(dev);
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000319 struct net_device *peer;
Lorenzo Bianconi65780c52020-03-19 17:41:25 +0100320 struct veth_stats rx;
Eric Dumazetb4fba472019-11-07 16:27:17 -0800321 u64 packets, bytes;
Eric Dumazet26811282012-12-29 16:02:43 +0000322
Eric Dumazetb4fba472019-11-07 16:27:17 -0800323 tot->tx_dropped = veth_stats_tx(dev, &packets, &bytes);
324 tot->tx_bytes = bytes;
325 tot->tx_packets = packets;
Toshiaki Makita4195e542018-10-11 18:36:49 +0900326
327 veth_stats_rx(&rx, dev);
328 tot->rx_dropped = rx.xdp_drops;
329 tot->rx_bytes = rx.xdp_bytes;
330 tot->rx_packets = rx.xdp_packets;
Eric Dumazet26811282012-12-29 16:02:43 +0000331
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000332 rcu_read_lock();
333 peer = rcu_dereference(priv->peer);
334 if (peer) {
Jiang Lidonge25d5db2020-03-04 09:49:29 +0800335 veth_stats_tx(peer, &packets, &bytes);
Eric Dumazetb4fba472019-11-07 16:27:17 -0800336 tot->rx_bytes += bytes;
337 tot->rx_packets += packets;
Toshiaki Makita4195e542018-10-11 18:36:49 +0900338
339 veth_stats_rx(&rx, peer);
340 tot->tx_bytes += rx.xdp_bytes;
341 tot->tx_packets += rx.xdp_packets;
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000342 }
343 rcu_read_unlock();
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700344}
345
Gao feng5c70ef82013-10-04 16:52:24 +0800346/* fake multicast ability */
347static void veth_set_multicast_list(struct net_device *dev)
348{
349}
350
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900351static struct sk_buff *veth_build_skb(void *head, int headroom, int len,
352 int buflen)
353{
354 struct sk_buff *skb;
355
356 if (!buflen) {
357 buflen = SKB_DATA_ALIGN(headroom + len) +
358 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
359 }
360 skb = build_skb(head, buflen);
361 if (!skb)
362 return NULL;
363
364 skb_reserve(skb, headroom);
365 skb_put(skb, len);
366
367 return skb;
368}
369
Toshiaki Makita638264d2018-08-03 16:58:18 +0900370static int veth_select_rxq(struct net_device *dev)
371{
372 return smp_processor_id() % dev->real_num_rx_queues;
373}
374
Toshiaki Makitaaf87a3a2018-08-03 16:58:14 +0900375static int veth_xdp_xmit(struct net_device *dev, int n,
376 struct xdp_frame **frames, u32 flags)
377{
378 struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
379 struct net_device *rcv;
Toshiaki Makita21314792018-10-11 18:36:48 +0900380 int i, ret, drops = n;
Toshiaki Makitaaf87a3a2018-08-03 16:58:14 +0900381 unsigned int max_len;
Toshiaki Makita638264d2018-08-03 16:58:18 +0900382 struct veth_rq *rq;
Toshiaki Makitaaf87a3a2018-08-03 16:58:14 +0900383
John Fastabendb23bfa52020-01-26 16:14:02 -0800384 rcu_read_lock();
Toshiaki Makita21314792018-10-11 18:36:48 +0900385 if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) {
386 ret = -EINVAL;
387 goto drop;
388 }
Toshiaki Makitaaf87a3a2018-08-03 16:58:14 +0900389
390 rcv = rcu_dereference(priv->peer);
Toshiaki Makita21314792018-10-11 18:36:48 +0900391 if (unlikely(!rcv)) {
392 ret = -ENXIO;
393 goto drop;
394 }
Toshiaki Makitaaf87a3a2018-08-03 16:58:14 +0900395
396 rcv_priv = netdev_priv(rcv);
Toshiaki Makita638264d2018-08-03 16:58:18 +0900397 rq = &rcv_priv->rq[veth_select_rxq(rcv)];
Toshiaki Makitaaf87a3a2018-08-03 16:58:14 +0900398 /* Non-NULL xdp_prog ensures that xdp_ring is initialized on receive
399 * side. This means an XDP program is loaded on the peer and the peer
400 * device is up.
401 */
Toshiaki Makita21314792018-10-11 18:36:48 +0900402 if (!rcu_access_pointer(rq->xdp_prog)) {
403 ret = -ENXIO;
404 goto drop;
405 }
Toshiaki Makitaaf87a3a2018-08-03 16:58:14 +0900406
Toshiaki Makita21314792018-10-11 18:36:48 +0900407 drops = 0;
Toshiaki Makitaaf87a3a2018-08-03 16:58:14 +0900408 max_len = rcv->mtu + rcv->hard_header_len + VLAN_HLEN;
409
Toshiaki Makita638264d2018-08-03 16:58:18 +0900410 spin_lock(&rq->xdp_ring.producer_lock);
Toshiaki Makitaaf87a3a2018-08-03 16:58:14 +0900411 for (i = 0; i < n; i++) {
412 struct xdp_frame *frame = frames[i];
413 void *ptr = veth_xdp_to_ptr(frame);
414
415 if (unlikely(frame->len > max_len ||
Toshiaki Makita638264d2018-08-03 16:58:18 +0900416 __ptr_ring_produce(&rq->xdp_ring, ptr))) {
Toshiaki Makitaaf87a3a2018-08-03 16:58:14 +0900417 xdp_return_frame_rx_napi(frame);
418 drops++;
419 }
420 }
Toshiaki Makita638264d2018-08-03 16:58:18 +0900421 spin_unlock(&rq->xdp_ring.producer_lock);
Toshiaki Makitaaf87a3a2018-08-03 16:58:14 +0900422
423 if (flags & XDP_XMIT_FLUSH)
Toshiaki Makita638264d2018-08-03 16:58:18 +0900424 __veth_xdp_flush(rq);
Toshiaki Makitaaf87a3a2018-08-03 16:58:14 +0900425
John Fastabendb23bfa52020-01-26 16:14:02 -0800426 if (likely(!drops)) {
427 rcu_read_unlock();
Toshiaki Makita21314792018-10-11 18:36:48 +0900428 return n;
John Fastabendb23bfa52020-01-26 16:14:02 -0800429 }
Toshiaki Makita21314792018-10-11 18:36:48 +0900430
431 ret = n - drops;
432drop:
John Fastabendb23bfa52020-01-26 16:14:02 -0800433 rcu_read_unlock();
Toshiaki Makita21314792018-10-11 18:36:48 +0900434 atomic64_add(drops, &priv->dropped);
435
436 return ret;
Toshiaki Makitaaf87a3a2018-08-03 16:58:14 +0900437}
438
Toshiaki Makita9cda7802019-06-13 18:39:59 +0900439static void veth_xdp_flush_bq(struct net_device *dev, struct veth_xdp_tx_bq *bq)
440{
441 int sent, i, err = 0;
442
443 sent = veth_xdp_xmit(dev, bq->count, bq->q, 0);
444 if (sent < 0) {
445 err = sent;
446 sent = 0;
447 for (i = 0; i < bq->count; i++)
448 xdp_return_frame(bq->q[i]);
449 }
450 trace_xdp_bulk_tx(dev, sent, bq->count - sent, err);
451
452 bq->count = 0;
453}
454
455static void veth_xdp_flush(struct net_device *dev, struct veth_xdp_tx_bq *bq)
Toshiaki Makitad1396002018-08-03 16:58:17 +0900456{
457 struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
458 struct net_device *rcv;
Toshiaki Makita638264d2018-08-03 16:58:18 +0900459 struct veth_rq *rq;
Toshiaki Makitad1396002018-08-03 16:58:17 +0900460
461 rcu_read_lock();
Toshiaki Makita9cda7802019-06-13 18:39:59 +0900462 veth_xdp_flush_bq(dev, bq);
Toshiaki Makitad1396002018-08-03 16:58:17 +0900463 rcv = rcu_dereference(priv->peer);
464 if (unlikely(!rcv))
465 goto out;
466
467 rcv_priv = netdev_priv(rcv);
Toshiaki Makita638264d2018-08-03 16:58:18 +0900468 rq = &rcv_priv->rq[veth_select_rxq(rcv)];
Toshiaki Makitad1396002018-08-03 16:58:17 +0900469 /* xdp_ring is initialized on receive side? */
Toshiaki Makita638264d2018-08-03 16:58:18 +0900470 if (unlikely(!rcu_access_pointer(rq->xdp_prog)))
Toshiaki Makitad1396002018-08-03 16:58:17 +0900471 goto out;
472
Toshiaki Makita638264d2018-08-03 16:58:18 +0900473 __veth_xdp_flush(rq);
Toshiaki Makitad1396002018-08-03 16:58:17 +0900474out:
475 rcu_read_unlock();
476}
477
Toshiaki Makita9cda7802019-06-13 18:39:59 +0900478static int veth_xdp_tx(struct net_device *dev, struct xdp_buff *xdp,
479 struct veth_xdp_tx_bq *bq)
Toshiaki Makitad1396002018-08-03 16:58:17 +0900480{
481 struct xdp_frame *frame = convert_to_xdp_frame(xdp);
482
483 if (unlikely(!frame))
484 return -EOVERFLOW;
485
Toshiaki Makita9cda7802019-06-13 18:39:59 +0900486 if (unlikely(bq->count == VETH_XDP_TX_BULK_SIZE))
487 veth_xdp_flush_bq(dev, bq);
488
489 bq->q[bq->count++] = frame;
490
491 return 0;
Toshiaki Makitad1396002018-08-03 16:58:17 +0900492}
493
Toshiaki Makita638264d2018-08-03 16:58:18 +0900494static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
Toshiaki Makitad1396002018-08-03 16:58:17 +0900495 struct xdp_frame *frame,
Lorenzo Bianconi1c5b82e52020-03-19 17:41:26 +0100496 struct veth_xdp_tx_bq *bq,
497 struct veth_stats *stats)
Toshiaki Makita9fc8d512018-08-03 16:58:13 +0900498{
499 void *hard_start = frame->data - frame->headroom;
500 void *head = hard_start - sizeof(struct xdp_frame);
501 int len = frame->len, delta = 0;
Toshiaki Makitad1396002018-08-03 16:58:17 +0900502 struct xdp_frame orig_frame;
Toshiaki Makita9fc8d512018-08-03 16:58:13 +0900503 struct bpf_prog *xdp_prog;
504 unsigned int headroom;
505 struct sk_buff *skb;
506
507 rcu_read_lock();
Toshiaki Makita638264d2018-08-03 16:58:18 +0900508 xdp_prog = rcu_dereference(rq->xdp_prog);
Toshiaki Makita9fc8d512018-08-03 16:58:13 +0900509 if (likely(xdp_prog)) {
510 struct xdp_buff xdp;
511 u32 act;
512
513 xdp.data_hard_start = hard_start;
514 xdp.data = frame->data;
515 xdp.data_end = frame->data + frame->len;
516 xdp.data_meta = frame->data - frame->metasize;
Toshiaki Makita638264d2018-08-03 16:58:18 +0900517 xdp.rxq = &rq->xdp_rxq;
Toshiaki Makita9fc8d512018-08-03 16:58:13 +0900518
519 act = bpf_prog_run_xdp(xdp_prog, &xdp);
520
521 switch (act) {
522 case XDP_PASS:
523 delta = frame->data - xdp.data;
524 len = xdp.data_end - xdp.data;
525 break;
Toshiaki Makitad1396002018-08-03 16:58:17 +0900526 case XDP_TX:
527 orig_frame = *frame;
528 xdp.data_hard_start = head;
529 xdp.rxq->mem = frame->mem;
Toshiaki Makita9cda7802019-06-13 18:39:59 +0900530 if (unlikely(veth_xdp_tx(rq->dev, &xdp, bq) < 0)) {
Toshiaki Makita638264d2018-08-03 16:58:18 +0900531 trace_xdp_exception(rq->dev, xdp_prog, act);
Toshiaki Makitad1396002018-08-03 16:58:17 +0900532 frame = &orig_frame;
Lorenzo Bianconi1c5b82e52020-03-19 17:41:26 +0100533 stats->rx_drops++;
Toshiaki Makitad1396002018-08-03 16:58:17 +0900534 goto err_xdp;
535 }
Lorenzo Bianconi1c5b82e52020-03-19 17:41:26 +0100536 stats->xdp_tx++;
Toshiaki Makitad1396002018-08-03 16:58:17 +0900537 rcu_read_unlock();
538 goto xdp_xmit;
539 case XDP_REDIRECT:
540 orig_frame = *frame;
541 xdp.data_hard_start = head;
542 xdp.rxq->mem = frame->mem;
Toshiaki Makita638264d2018-08-03 16:58:18 +0900543 if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) {
Toshiaki Makitad1396002018-08-03 16:58:17 +0900544 frame = &orig_frame;
Lorenzo Bianconi1c5b82e52020-03-19 17:41:26 +0100545 stats->rx_drops++;
Toshiaki Makitad1396002018-08-03 16:58:17 +0900546 goto err_xdp;
547 }
Lorenzo Bianconi1c5b82e52020-03-19 17:41:26 +0100548 stats->xdp_redirect++;
Toshiaki Makitad1396002018-08-03 16:58:17 +0900549 rcu_read_unlock();
550 goto xdp_xmit;
Toshiaki Makita9fc8d512018-08-03 16:58:13 +0900551 default:
552 bpf_warn_invalid_xdp_action(act);
Gustavo A. R. Silvaa9b6d9e2019-02-08 12:37:33 -0600553 /* fall through */
Toshiaki Makita9fc8d512018-08-03 16:58:13 +0900554 case XDP_ABORTED:
Toshiaki Makita638264d2018-08-03 16:58:18 +0900555 trace_xdp_exception(rq->dev, xdp_prog, act);
Gustavo A. R. Silvaa9b6d9e2019-02-08 12:37:33 -0600556 /* fall through */
Toshiaki Makita9fc8d512018-08-03 16:58:13 +0900557 case XDP_DROP:
Lorenzo Bianconi1c5b82e52020-03-19 17:41:26 +0100558 stats->xdp_drops++;
Toshiaki Makita9fc8d512018-08-03 16:58:13 +0900559 goto err_xdp;
560 }
561 }
562 rcu_read_unlock();
563
564 headroom = sizeof(struct xdp_frame) + frame->headroom - delta;
565 skb = veth_build_skb(head, headroom, len, 0);
566 if (!skb) {
567 xdp_return_frame(frame);
Lorenzo Bianconi1c5b82e52020-03-19 17:41:26 +0100568 stats->rx_drops++;
Toshiaki Makita9fc8d512018-08-03 16:58:13 +0900569 goto err;
570 }
571
Jesper Dangaard Brouercbf33512019-06-18 15:05:32 +0200572 xdp_release_frame(frame);
Toshiaki Makita9fc8d512018-08-03 16:58:13 +0900573 xdp_scrub_frame(frame);
Toshiaki Makita638264d2018-08-03 16:58:18 +0900574 skb->protocol = eth_type_trans(skb, rq->dev);
Toshiaki Makita9fc8d512018-08-03 16:58:13 +0900575err:
576 return skb;
577err_xdp:
578 rcu_read_unlock();
579 xdp_return_frame(frame);
Toshiaki Makitad1396002018-08-03 16:58:17 +0900580xdp_xmit:
Toshiaki Makita9fc8d512018-08-03 16:58:13 +0900581 return NULL;
582}
583
Lorenzo Bianconi1c5b82e52020-03-19 17:41:26 +0100584static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
585 struct sk_buff *skb,
586 struct veth_xdp_tx_bq *bq,
587 struct veth_stats *stats)
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900588{
589 u32 pktlen, headroom, act, metalen;
590 void *orig_data, *orig_data_end;
591 struct bpf_prog *xdp_prog;
592 int mac_len, delta, off;
593 struct xdp_buff xdp;
594
Toshiaki Makita4bf9ffa2018-09-14 13:33:44 +0900595 skb_orphan(skb);
596
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900597 rcu_read_lock();
Toshiaki Makita638264d2018-08-03 16:58:18 +0900598 xdp_prog = rcu_dereference(rq->xdp_prog);
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900599 if (unlikely(!xdp_prog)) {
600 rcu_read_unlock();
601 goto out;
602 }
603
604 mac_len = skb->data - skb_mac_header(skb);
605 pktlen = skb->len + mac_len;
606 headroom = skb_headroom(skb) - mac_len;
607
608 if (skb_shared(skb) || skb_head_is_locked(skb) ||
609 skb_is_nonlinear(skb) || headroom < XDP_PACKET_HEADROOM) {
610 struct sk_buff *nskb;
611 int size, head_off;
612 void *head, *start;
613 struct page *page;
614
615 size = SKB_DATA_ALIGN(VETH_XDP_HEADROOM + pktlen) +
616 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
617 if (size > PAGE_SIZE)
618 goto drop;
619
620 page = alloc_page(GFP_ATOMIC | __GFP_NOWARN);
621 if (!page)
622 goto drop;
623
624 head = page_address(page);
625 start = head + VETH_XDP_HEADROOM;
626 if (skb_copy_bits(skb, -mac_len, start, pktlen)) {
627 page_frag_free(head);
628 goto drop;
629 }
630
631 nskb = veth_build_skb(head,
632 VETH_XDP_HEADROOM + mac_len, skb->len,
633 PAGE_SIZE);
634 if (!nskb) {
635 page_frag_free(head);
636 goto drop;
637 }
638
639 skb_copy_header(nskb, skb);
640 head_off = skb_headroom(nskb) - skb_headroom(skb);
641 skb_headers_offset_update(nskb, head_off);
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900642 consume_skb(skb);
643 skb = nskb;
644 }
645
646 xdp.data_hard_start = skb->head;
647 xdp.data = skb_mac_header(skb);
648 xdp.data_end = xdp.data + pktlen;
649 xdp.data_meta = xdp.data;
Toshiaki Makita638264d2018-08-03 16:58:18 +0900650 xdp.rxq = &rq->xdp_rxq;
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900651 orig_data = xdp.data;
652 orig_data_end = xdp.data_end;
653
654 act = bpf_prog_run_xdp(xdp_prog, &xdp);
655
656 switch (act) {
657 case XDP_PASS:
658 break;
Toshiaki Makitad1396002018-08-03 16:58:17 +0900659 case XDP_TX:
660 get_page(virt_to_page(xdp.data));
661 consume_skb(skb);
Toshiaki Makita638264d2018-08-03 16:58:18 +0900662 xdp.rxq->mem = rq->xdp_mem;
Toshiaki Makita9cda7802019-06-13 18:39:59 +0900663 if (unlikely(veth_xdp_tx(rq->dev, &xdp, bq) < 0)) {
Toshiaki Makita638264d2018-08-03 16:58:18 +0900664 trace_xdp_exception(rq->dev, xdp_prog, act);
Lorenzo Bianconi1c5b82e52020-03-19 17:41:26 +0100665 stats->rx_drops++;
Toshiaki Makitad1396002018-08-03 16:58:17 +0900666 goto err_xdp;
667 }
Lorenzo Bianconi1c5b82e52020-03-19 17:41:26 +0100668 stats->xdp_tx++;
Toshiaki Makitad1396002018-08-03 16:58:17 +0900669 rcu_read_unlock();
670 goto xdp_xmit;
671 case XDP_REDIRECT:
672 get_page(virt_to_page(xdp.data));
673 consume_skb(skb);
Toshiaki Makita638264d2018-08-03 16:58:18 +0900674 xdp.rxq->mem = rq->xdp_mem;
Lorenzo Bianconi1c5b82e52020-03-19 17:41:26 +0100675 if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) {
676 stats->rx_drops++;
Toshiaki Makitad1396002018-08-03 16:58:17 +0900677 goto err_xdp;
Lorenzo Bianconi1c5b82e52020-03-19 17:41:26 +0100678 }
679 stats->xdp_redirect++;
Toshiaki Makitad1396002018-08-03 16:58:17 +0900680 rcu_read_unlock();
681 goto xdp_xmit;
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900682 default:
683 bpf_warn_invalid_xdp_action(act);
Gustavo A. R. Silvaa9b6d9e2019-02-08 12:37:33 -0600684 /* fall through */
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900685 case XDP_ABORTED:
Toshiaki Makita638264d2018-08-03 16:58:18 +0900686 trace_xdp_exception(rq->dev, xdp_prog, act);
Gustavo A. R. Silvaa9b6d9e2019-02-08 12:37:33 -0600687 /* fall through */
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900688 case XDP_DROP:
Lorenzo Bianconi1c5b82e52020-03-19 17:41:26 +0100689 stats->xdp_drops++;
690 goto xdp_drop;
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900691 }
692 rcu_read_unlock();
693
694 delta = orig_data - xdp.data;
695 off = mac_len + delta;
696 if (off > 0)
697 __skb_push(skb, off);
698 else if (off < 0)
699 __skb_pull(skb, -off);
700 skb->mac_header -= delta;
701 off = xdp.data_end - orig_data_end;
702 if (off != 0)
703 __skb_put(skb, off);
Toshiaki Makita638264d2018-08-03 16:58:18 +0900704 skb->protocol = eth_type_trans(skb, rq->dev);
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900705
706 metalen = xdp.data - xdp.data_meta;
707 if (metalen)
708 skb_metadata_set(skb, metalen);
709out:
710 return skb;
711drop:
Lorenzo Bianconi1c5b82e52020-03-19 17:41:26 +0100712 stats->rx_drops++;
713xdp_drop:
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900714 rcu_read_unlock();
715 kfree_skb(skb);
716 return NULL;
Toshiaki Makitad1396002018-08-03 16:58:17 +0900717err_xdp:
718 rcu_read_unlock();
719 page_frag_free(xdp.data);
720xdp_xmit:
721 return NULL;
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900722}
723
Lorenzo Bianconi1c5b82e52020-03-19 17:41:26 +0100724static int veth_xdp_rcv(struct veth_rq *rq, int budget,
725 struct veth_xdp_tx_bq *bq,
726 struct veth_stats *stats)
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900727{
Lorenzo Bianconi1c5b82e52020-03-19 17:41:26 +0100728 int i, done = 0;
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900729
730 for (i = 0; i < budget; i++) {
Toshiaki Makita638264d2018-08-03 16:58:18 +0900731 void *ptr = __ptr_ring_consume(&rq->xdp_ring);
Toshiaki Makita9fc8d512018-08-03 16:58:13 +0900732 struct sk_buff *skb;
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900733
Toshiaki Makita9fc8d512018-08-03 16:58:13 +0900734 if (!ptr)
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900735 break;
736
Toshiaki Makitad1396002018-08-03 16:58:17 +0900737 if (veth_is_xdp_frame(ptr)) {
Toshiaki Makita4195e542018-10-11 18:36:49 +0900738 struct xdp_frame *frame = veth_ptr_to_xdp(ptr);
739
Lorenzo Bianconi1c5b82e52020-03-19 17:41:26 +0100740 stats->xdp_bytes += frame->len;
741 skb = veth_xdp_rcv_one(rq, frame, bq, stats);
Toshiaki Makitad1396002018-08-03 16:58:17 +0900742 } else {
Toshiaki Makita4195e542018-10-11 18:36:49 +0900743 skb = ptr;
Lorenzo Bianconi1c5b82e52020-03-19 17:41:26 +0100744 stats->xdp_bytes += skb->len;
745 skb = veth_xdp_rcv_skb(rq, skb, bq, stats);
Toshiaki Makitad1396002018-08-03 16:58:17 +0900746 }
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900747
748 if (skb)
Toshiaki Makita638264d2018-08-03 16:58:18 +0900749 napi_gro_receive(&rq->xdp_napi, skb);
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900750
751 done++;
752 }
753
Toshiaki Makita4195e542018-10-11 18:36:49 +0900754 u64_stats_update_begin(&rq->stats.syncp);
Lorenzo Bianconi1c5b82e52020-03-19 17:41:26 +0100755 rq->stats.vs.xdp_bytes += stats->xdp_bytes;
756 rq->stats.vs.xdp_drops += stats->xdp_drops + stats->rx_drops;
Lorenzo Bianconi65780c52020-03-19 17:41:25 +0100757 rq->stats.vs.xdp_packets += done;
Toshiaki Makita4195e542018-10-11 18:36:49 +0900758 u64_stats_update_end(&rq->stats.syncp);
759
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900760 return done;
761}
762
763static int veth_poll(struct napi_struct *napi, int budget)
764{
Toshiaki Makita638264d2018-08-03 16:58:18 +0900765 struct veth_rq *rq =
766 container_of(napi, struct veth_rq, xdp_napi);
Lorenzo Bianconi1c5b82e52020-03-19 17:41:26 +0100767 struct veth_stats stats = {};
Toshiaki Makita9cda7802019-06-13 18:39:59 +0900768 struct veth_xdp_tx_bq bq;
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900769 int done;
770
Toshiaki Makita9cda7802019-06-13 18:39:59 +0900771 bq.count = 0;
772
Toshiaki Makitad1396002018-08-03 16:58:17 +0900773 xdp_set_return_frame_no_direct();
Lorenzo Bianconi1c5b82e52020-03-19 17:41:26 +0100774 done = veth_xdp_rcv(rq, budget, &bq, &stats);
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900775
776 if (done < budget && napi_complete_done(napi, done)) {
777 /* Write rx_notify_masked before reading ptr_ring */
Toshiaki Makita638264d2018-08-03 16:58:18 +0900778 smp_store_mb(rq->rx_notify_masked, false);
779 if (unlikely(!__ptr_ring_empty(&rq->xdp_ring))) {
780 rq->rx_notify_masked = true;
781 napi_schedule(&rq->xdp_napi);
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900782 }
783 }
784
Lorenzo Bianconi1c5b82e52020-03-19 17:41:26 +0100785 if (stats.xdp_tx > 0)
Toshiaki Makita9cda7802019-06-13 18:39:59 +0900786 veth_xdp_flush(rq->dev, &bq);
Lorenzo Bianconi1c5b82e52020-03-19 17:41:26 +0100787 if (stats.xdp_redirect > 0)
Toke Høiland-Jørgensen1d233882020-01-16 16:14:45 +0100788 xdp_do_flush();
Toshiaki Makitad1396002018-08-03 16:58:17 +0900789 xdp_clear_return_frame_no_direct();
790
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900791 return done;
792}
793
794static int veth_napi_add(struct net_device *dev)
795{
796 struct veth_priv *priv = netdev_priv(dev);
Toshiaki Makita638264d2018-08-03 16:58:18 +0900797 int err, i;
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900798
Toshiaki Makita638264d2018-08-03 16:58:18 +0900799 for (i = 0; i < dev->real_num_rx_queues; i++) {
800 struct veth_rq *rq = &priv->rq[i];
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900801
Toshiaki Makita638264d2018-08-03 16:58:18 +0900802 err = ptr_ring_init(&rq->xdp_ring, VETH_RING_SIZE, GFP_KERNEL);
803 if (err)
804 goto err_xdp_ring;
805 }
806
807 for (i = 0; i < dev->real_num_rx_queues; i++) {
808 struct veth_rq *rq = &priv->rq[i];
809
810 netif_napi_add(dev, &rq->xdp_napi, veth_poll, NAPI_POLL_WEIGHT);
811 napi_enable(&rq->xdp_napi);
812 }
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900813
814 return 0;
Toshiaki Makita638264d2018-08-03 16:58:18 +0900815err_xdp_ring:
816 for (i--; i >= 0; i--)
817 ptr_ring_cleanup(&priv->rq[i].xdp_ring, veth_ptr_free);
818
819 return err;
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900820}
821
822static void veth_napi_del(struct net_device *dev)
823{
824 struct veth_priv *priv = netdev_priv(dev);
Toshiaki Makita638264d2018-08-03 16:58:18 +0900825 int i;
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900826
Toshiaki Makita638264d2018-08-03 16:58:18 +0900827 for (i = 0; i < dev->real_num_rx_queues; i++) {
828 struct veth_rq *rq = &priv->rq[i];
829
830 napi_disable(&rq->xdp_napi);
831 napi_hash_del(&rq->xdp_napi);
832 }
833 synchronize_net();
834
835 for (i = 0; i < dev->real_num_rx_queues; i++) {
836 struct veth_rq *rq = &priv->rq[i];
837
838 netif_napi_del(&rq->xdp_napi);
839 rq->rx_notify_masked = false;
840 ptr_ring_cleanup(&rq->xdp_ring, veth_ptr_free);
841 }
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900842}
843
844static int veth_enable_xdp(struct net_device *dev)
845{
846 struct veth_priv *priv = netdev_priv(dev);
Toshiaki Makita638264d2018-08-03 16:58:18 +0900847 int err, i;
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900848
Toshiaki Makita638264d2018-08-03 16:58:18 +0900849 if (!xdp_rxq_info_is_reg(&priv->rq[0].xdp_rxq)) {
850 for (i = 0; i < dev->real_num_rx_queues; i++) {
851 struct veth_rq *rq = &priv->rq[i];
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900852
Toshiaki Makita638264d2018-08-03 16:58:18 +0900853 err = xdp_rxq_info_reg(&rq->xdp_rxq, dev, i);
854 if (err < 0)
855 goto err_rxq_reg;
856
857 err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
858 MEM_TYPE_PAGE_SHARED,
859 NULL);
860 if (err < 0)
861 goto err_reg_mem;
862
863 /* Save original mem info as it can be overwritten */
864 rq->xdp_mem = rq->xdp_rxq.mem;
865 }
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900866
867 err = veth_napi_add(dev);
868 if (err)
Toshiaki Makita638264d2018-08-03 16:58:18 +0900869 goto err_rxq_reg;
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900870 }
871
Toshiaki Makita638264d2018-08-03 16:58:18 +0900872 for (i = 0; i < dev->real_num_rx_queues; i++)
873 rcu_assign_pointer(priv->rq[i].xdp_prog, priv->_xdp_prog);
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900874
875 return 0;
Toshiaki Makita638264d2018-08-03 16:58:18 +0900876err_reg_mem:
877 xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq);
878err_rxq_reg:
879 for (i--; i >= 0; i--)
880 xdp_rxq_info_unreg(&priv->rq[i].xdp_rxq);
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900881
882 return err;
883}
884
885static void veth_disable_xdp(struct net_device *dev)
886{
887 struct veth_priv *priv = netdev_priv(dev);
Toshiaki Makita638264d2018-08-03 16:58:18 +0900888 int i;
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900889
Toshiaki Makita638264d2018-08-03 16:58:18 +0900890 for (i = 0; i < dev->real_num_rx_queues; i++)
891 rcu_assign_pointer(priv->rq[i].xdp_prog, NULL);
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900892 veth_napi_del(dev);
Toshiaki Makita638264d2018-08-03 16:58:18 +0900893 for (i = 0; i < dev->real_num_rx_queues; i++) {
894 struct veth_rq *rq = &priv->rq[i];
895
896 rq->xdp_rxq.mem = rq->xdp_mem;
897 xdp_rxq_info_unreg(&rq->xdp_rxq);
898 }
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900899}
900
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700901static int veth_open(struct net_device *dev)
902{
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000903 struct veth_priv *priv = netdev_priv(dev);
904 struct net_device *peer = rtnl_dereference(priv->peer);
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900905 int err;
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700906
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000907 if (!peer)
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700908 return -ENOTCONN;
909
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900910 if (priv->_xdp_prog) {
911 err = veth_enable_xdp(dev);
912 if (err)
913 return err;
914 }
915
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000916 if (peer->flags & IFF_UP) {
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700917 netif_carrier_on(dev);
Eric Dumazetd0e2c552013-01-04 15:42:40 +0000918 netif_carrier_on(peer);
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700919 }
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900920
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700921 return 0;
922}
923
Eric W. Biederman2cf48a12009-02-25 19:47:29 +0000924static int veth_close(struct net_device *dev)
925{
926 struct veth_priv *priv = netdev_priv(dev);
Eric Dumazet2efd32e2013-01-10 08:32:45 +0000927 struct net_device *peer = rtnl_dereference(priv->peer);
Eric W. Biederman2cf48a12009-02-25 19:47:29 +0000928
929 netif_carrier_off(dev);
Eric Dumazet2efd32e2013-01-10 08:32:45 +0000930 if (peer)
931 netif_carrier_off(peer);
Eric W. Biederman2cf48a12009-02-25 19:47:29 +0000932
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900933 if (priv->_xdp_prog)
934 veth_disable_xdp(dev);
935
Eric W. Biederman2cf48a12009-02-25 19:47:29 +0000936 return 0;
937}
938
Jarod Wilson91572082016-10-20 13:55:20 -0400939static int is_valid_veth_mtu(int mtu)
Eric Biederman38d40812009-03-03 23:36:04 -0800940{
Jarod Wilson91572082016-10-20 13:55:20 -0400941 return mtu >= ETH_MIN_MTU && mtu <= ETH_MAX_MTU;
Eric Biederman38d40812009-03-03 23:36:04 -0800942}
943
Toshiaki Makita7797b932018-08-15 17:07:29 +0900944static int veth_alloc_queues(struct net_device *dev)
945{
946 struct veth_priv *priv = netdev_priv(dev);
947 int i;
948
949 priv->rq = kcalloc(dev->num_rx_queues, sizeof(*priv->rq), GFP_KERNEL);
950 if (!priv->rq)
951 return -ENOMEM;
952
Toshiaki Makita4195e542018-10-11 18:36:49 +0900953 for (i = 0; i < dev->num_rx_queues; i++) {
Toshiaki Makita7797b932018-08-15 17:07:29 +0900954 priv->rq[i].dev = dev;
Toshiaki Makita4195e542018-10-11 18:36:49 +0900955 u64_stats_init(&priv->rq[i].stats.syncp);
956 }
Toshiaki Makita7797b932018-08-15 17:07:29 +0900957
958 return 0;
959}
960
961static void veth_free_queues(struct net_device *dev)
962{
963 struct veth_priv *priv = netdev_priv(dev);
964
965 kfree(priv->rq);
966}
967
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700968static int veth_dev_init(struct net_device *dev)
969{
Toshiaki Makita7797b932018-08-15 17:07:29 +0900970 int err;
971
Li RongQing14d73412018-09-17 18:46:55 +0800972 dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats);
973 if (!dev->lstats)
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700974 return -ENOMEM;
Toshiaki Makita7797b932018-08-15 17:07:29 +0900975
976 err = veth_alloc_queues(dev);
977 if (err) {
Li RongQing14d73412018-09-17 18:46:55 +0800978 free_percpu(dev->lstats);
Toshiaki Makita7797b932018-08-15 17:07:29 +0900979 return err;
980 }
981
Pavel Emelyanove314dbd2007-09-25 16:14:46 -0700982 return 0;
983}
984
David S. Miller11687a12009-06-25 02:45:42 -0700985static void veth_dev_free(struct net_device *dev)
986{
Toshiaki Makita7797b932018-08-15 17:07:29 +0900987 veth_free_queues(dev);
Li RongQing14d73412018-09-17 18:46:55 +0800988 free_percpu(dev->lstats);
David S. Miller11687a12009-06-25 02:45:42 -0700989}
990
WANG Congbb446c12014-06-23 15:36:02 -0700991#ifdef CONFIG_NET_POLL_CONTROLLER
992static void veth_poll_controller(struct net_device *dev)
993{
994 /* veth only receives frames when its peer sends one
Toshiaki Makita948d4f22018-08-03 16:58:10 +0900995 * Since it has nothing to do with disabling irqs, we are guaranteed
WANG Congbb446c12014-06-23 15:36:02 -0700996 * never to have pending data when we poll for it so
997 * there is nothing to do here.
998 *
999 * We need this though so netpoll recognizes us as an interface that
1000 * supports polling, which enables bridge devices in virt setups to
1001 * still use netconsole
1002 */
1003}
1004#endif /* CONFIG_NET_POLL_CONTROLLER */
1005
Nicolas Dichtela45253b2015-04-02 17:07:11 +02001006static int veth_get_iflink(const struct net_device *dev)
1007{
1008 struct veth_priv *priv = netdev_priv(dev);
1009 struct net_device *peer;
1010 int iflink;
1011
1012 rcu_read_lock();
1013 peer = rcu_dereference(priv->peer);
1014 iflink = peer ? peer->ifindex : 0;
1015 rcu_read_unlock();
1016
1017 return iflink;
1018}
1019
Toshiaki Makitadc224822018-08-03 16:58:11 +09001020static netdev_features_t veth_fix_features(struct net_device *dev,
1021 netdev_features_t features)
1022{
1023 struct veth_priv *priv = netdev_priv(dev);
1024 struct net_device *peer;
1025
1026 peer = rtnl_dereference(priv->peer);
1027 if (peer) {
1028 struct veth_priv *peer_priv = netdev_priv(peer);
1029
1030 if (peer_priv->_xdp_prog)
1031 features &= ~NETIF_F_GSO_SOFTWARE;
1032 }
1033
1034 return features;
1035}
1036
Paolo Abeni163e5292016-02-26 10:45:41 +01001037static void veth_set_rx_headroom(struct net_device *dev, int new_hr)
1038{
1039 struct veth_priv *peer_priv, *priv = netdev_priv(dev);
1040 struct net_device *peer;
1041
1042 if (new_hr < 0)
1043 new_hr = 0;
1044
1045 rcu_read_lock();
1046 peer = rcu_dereference(priv->peer);
1047 if (unlikely(!peer))
1048 goto out;
1049
1050 peer_priv = netdev_priv(peer);
1051 priv->requested_headroom = new_hr;
1052 new_hr = max(priv->requested_headroom, peer_priv->requested_headroom);
1053 dev->needed_headroom = new_hr;
1054 peer->needed_headroom = new_hr;
1055
1056out:
1057 rcu_read_unlock();
1058}
1059
Toshiaki Makita948d4f22018-08-03 16:58:10 +09001060static int veth_xdp_set(struct net_device *dev, struct bpf_prog *prog,
1061 struct netlink_ext_ack *extack)
1062{
1063 struct veth_priv *priv = netdev_priv(dev);
1064 struct bpf_prog *old_prog;
1065 struct net_device *peer;
Toshiaki Makitadc224822018-08-03 16:58:11 +09001066 unsigned int max_mtu;
Toshiaki Makita948d4f22018-08-03 16:58:10 +09001067 int err;
1068
1069 old_prog = priv->_xdp_prog;
1070 priv->_xdp_prog = prog;
1071 peer = rtnl_dereference(priv->peer);
1072
1073 if (prog) {
1074 if (!peer) {
1075 NL_SET_ERR_MSG_MOD(extack, "Cannot set XDP when peer is detached");
1076 err = -ENOTCONN;
1077 goto err;
1078 }
1079
Toshiaki Makitadc224822018-08-03 16:58:11 +09001080 max_mtu = PAGE_SIZE - VETH_XDP_HEADROOM -
1081 peer->hard_header_len -
1082 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1083 if (peer->mtu > max_mtu) {
1084 NL_SET_ERR_MSG_MOD(extack, "Peer MTU is too large to set XDP");
1085 err = -ERANGE;
1086 goto err;
1087 }
1088
Toshiaki Makita638264d2018-08-03 16:58:18 +09001089 if (dev->real_num_rx_queues < peer->real_num_tx_queues) {
1090 NL_SET_ERR_MSG_MOD(extack, "XDP expects number of rx queues not less than peer tx queues");
1091 err = -ENOSPC;
1092 goto err;
1093 }
1094
Toshiaki Makita948d4f22018-08-03 16:58:10 +09001095 if (dev->flags & IFF_UP) {
1096 err = veth_enable_xdp(dev);
1097 if (err) {
1098 NL_SET_ERR_MSG_MOD(extack, "Setup for XDP failed");
1099 goto err;
1100 }
1101 }
Toshiaki Makitadc224822018-08-03 16:58:11 +09001102
1103 if (!old_prog) {
1104 peer->hw_features &= ~NETIF_F_GSO_SOFTWARE;
1105 peer->max_mtu = max_mtu;
1106 }
Toshiaki Makita948d4f22018-08-03 16:58:10 +09001107 }
1108
1109 if (old_prog) {
Toshiaki Makitadc224822018-08-03 16:58:11 +09001110 if (!prog) {
1111 if (dev->flags & IFF_UP)
1112 veth_disable_xdp(dev);
1113
1114 if (peer) {
1115 peer->hw_features |= NETIF_F_GSO_SOFTWARE;
1116 peer->max_mtu = ETH_MAX_MTU;
1117 }
1118 }
Toshiaki Makita948d4f22018-08-03 16:58:10 +09001119 bpf_prog_put(old_prog);
1120 }
1121
Toshiaki Makitadc224822018-08-03 16:58:11 +09001122 if ((!!old_prog ^ !!prog) && peer)
1123 netdev_update_features(peer);
1124
Toshiaki Makita948d4f22018-08-03 16:58:10 +09001125 return 0;
1126err:
1127 priv->_xdp_prog = old_prog;
1128
1129 return err;
1130}
1131
1132static u32 veth_xdp_query(struct net_device *dev)
1133{
1134 struct veth_priv *priv = netdev_priv(dev);
1135 const struct bpf_prog *xdp_prog;
1136
1137 xdp_prog = priv->_xdp_prog;
1138 if (xdp_prog)
1139 return xdp_prog->aux->id;
1140
1141 return 0;
1142}
1143
1144static int veth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
1145{
1146 switch (xdp->command) {
1147 case XDP_SETUP_PROG:
1148 return veth_xdp_set(dev, xdp->prog, xdp->extack);
1149 case XDP_QUERY_PROG:
1150 xdp->prog_id = veth_xdp_query(dev);
1151 return 0;
1152 default:
1153 return -EINVAL;
1154 }
1155}
1156
Stephen Hemminger4456e7b2008-11-19 21:50:10 -08001157static const struct net_device_ops veth_netdev_ops = {
Daniel Lezcanoee923622009-02-22 00:04:45 -08001158 .ndo_init = veth_dev_init,
1159 .ndo_open = veth_open,
Eric W. Biederman2cf48a12009-02-25 19:47:29 +00001160 .ndo_stop = veth_close,
Daniel Lezcanoee923622009-02-22 00:04:45 -08001161 .ndo_start_xmit = veth_xmit,
stephen hemminger6311cc42011-06-08 14:53:59 +00001162 .ndo_get_stats64 = veth_get_stats64,
Gao feng5c70ef82013-10-04 16:52:24 +08001163 .ndo_set_rx_mode = veth_set_multicast_list,
Daniel Lezcanoee923622009-02-22 00:04:45 -08001164 .ndo_set_mac_address = eth_mac_addr,
WANG Congbb446c12014-06-23 15:36:02 -07001165#ifdef CONFIG_NET_POLL_CONTROLLER
1166 .ndo_poll_controller = veth_poll_controller,
1167#endif
Nicolas Dichtela45253b2015-04-02 17:07:11 +02001168 .ndo_get_iflink = veth_get_iflink,
Toshiaki Makitadc224822018-08-03 16:58:11 +09001169 .ndo_fix_features = veth_fix_features,
Toshiaki Makita1a04a822015-07-31 15:03:25 +09001170 .ndo_features_check = passthru_features_check,
Paolo Abeni163e5292016-02-26 10:45:41 +01001171 .ndo_set_rx_headroom = veth_set_rx_headroom,
Toshiaki Makita948d4f22018-08-03 16:58:10 +09001172 .ndo_bpf = veth_xdp,
Toshiaki Makitaaf87a3a2018-08-03 16:58:14 +09001173 .ndo_xdp_xmit = veth_xdp_xmit,
Stephen Hemminger4456e7b2008-11-19 21:50:10 -08001174};
1175
Alexander Duyck732912d72016-04-19 14:02:26 -04001176#define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \
Xin Longc80fafb2016-08-25 13:21:49 +08001177 NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \
Alexander Duyck732912d72016-04-19 14:02:26 -04001178 NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \
Patrick McHardy28d2b132013-04-19 02:04:32 +00001179 NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \
1180 NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX )
Eric Dumazet80933152012-12-29 16:26:10 +00001181
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001182static void veth_setup(struct net_device *dev)
1183{
1184 ether_setup(dev);
1185
Neil Horman550fd082011-07-26 06:05:38 +00001186 dev->priv_flags &= ~IFF_TX_SKB_SHARING;
Hannes Frederic Sowa23ea5a92012-10-30 16:22:01 +00001187 dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
Phil Sutter02f01ec2015-08-18 10:30:29 +02001188 dev->priv_flags |= IFF_NO_QUEUE;
Paolo Abeni163e5292016-02-26 10:45:41 +01001189 dev->priv_flags |= IFF_PHONY_HEADROOM;
Neil Horman550fd082011-07-26 06:05:38 +00001190
Stephen Hemminger4456e7b2008-11-19 21:50:10 -08001191 dev->netdev_ops = &veth_netdev_ops;
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001192 dev->ethtool_ops = &veth_ethtool_ops;
1193 dev->features |= NETIF_F_LLTX;
Eric Dumazet80933152012-12-29 16:26:10 +00001194 dev->features |= VETH_FEATURES;
Toshiaki Makita8d0d21f2014-02-18 21:20:08 +09001195 dev->vlan_features = dev->features &
Vlad Yasevich3f8c7072014-03-27 22:14:48 -04001196 ~(NETIF_F_HW_VLAN_CTAG_TX |
1197 NETIF_F_HW_VLAN_STAG_TX |
1198 NETIF_F_HW_VLAN_CTAG_RX |
1199 NETIF_F_HW_VLAN_STAG_RX);
David S. Millercf124db2017-05-08 12:52:56 -04001200 dev->needs_free_netdev = true;
1201 dev->priv_destructor = veth_dev_free;
Jarod Wilson91572082016-10-20 13:55:20 -04001202 dev->max_mtu = ETH_MAX_MTU;
Michał Mirosława2c725f2011-03-31 01:01:35 +00001203
Eric Dumazet80933152012-12-29 16:26:10 +00001204 dev->hw_features = VETH_FEATURES;
Eric Dumazet82d81892013-10-25 18:25:03 -07001205 dev->hw_enc_features = VETH_FEATURES;
David Ahern607fca92016-08-24 20:10:45 -07001206 dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE;
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001207}
1208
1209/*
1210 * netlink interface
1211 */
1212
Matthias Schiffera8b8a8892017-06-25 23:56:01 +02001213static int veth_validate(struct nlattr *tb[], struct nlattr *data[],
1214 struct netlink_ext_ack *extack)
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001215{
1216 if (tb[IFLA_ADDRESS]) {
1217 if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
1218 return -EINVAL;
1219 if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
1220 return -EADDRNOTAVAIL;
1221 }
Eric Biederman38d40812009-03-03 23:36:04 -08001222 if (tb[IFLA_MTU]) {
1223 if (!is_valid_veth_mtu(nla_get_u32(tb[IFLA_MTU])))
1224 return -EINVAL;
1225 }
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001226 return 0;
1227}
1228
1229static struct rtnl_link_ops veth_link_ops;
1230
Eric W. Biederman81adee42009-11-08 00:53:51 -08001231static int veth_newlink(struct net *src_net, struct net_device *dev,
Matthias Schiffer7a3f4a12017-06-25 23:55:59 +02001232 struct nlattr *tb[], struct nlattr *data[],
1233 struct netlink_ext_ack *extack)
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001234{
Toshiaki Makita7797b932018-08-15 17:07:29 +09001235 int err;
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001236 struct net_device *peer;
1237 struct veth_priv *priv;
1238 char ifname[IFNAMSIZ];
1239 struct nlattr *peer_tb[IFLA_MAX + 1], **tbp;
Tom Gundersen55177502014-07-14 16:37:25 +02001240 unsigned char name_assign_type;
Patrick McHardy3729d502010-02-26 06:34:54 +00001241 struct ifinfomsg *ifmp;
Eric W. Biederman81adee42009-11-08 00:53:51 -08001242 struct net *net;
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001243
1244 /*
1245 * create and register peer first
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001246 */
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001247 if (data != NULL && data[VETH_INFO_PEER] != NULL) {
1248 struct nlattr *nla_peer;
1249
1250 nla_peer = data[VETH_INFO_PEER];
Patrick McHardy3729d502010-02-26 06:34:54 +00001251 ifmp = nla_data(nla_peer);
Jiri Pirkof7b12602014-02-18 20:53:18 +01001252 err = rtnl_nla_parse_ifla(peer_tb,
1253 nla_data(nla_peer) + sizeof(struct ifinfomsg),
Johannes Bergfceb6432017-04-12 14:34:07 +02001254 nla_len(nla_peer) - sizeof(struct ifinfomsg),
1255 NULL);
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001256 if (err < 0)
1257 return err;
1258
Matthias Schiffera8b8a8892017-06-25 23:56:01 +02001259 err = veth_validate(peer_tb, NULL, extack);
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001260 if (err < 0)
1261 return err;
1262
1263 tbp = peer_tb;
Patrick McHardy3729d502010-02-26 06:34:54 +00001264 } else {
1265 ifmp = NULL;
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001266 tbp = tb;
Patrick McHardy3729d502010-02-26 06:34:54 +00001267 }
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001268
Serhey Popovych191cdb32017-06-21 12:12:24 +03001269 if (ifmp && tbp[IFLA_IFNAME]) {
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001270 nla_strlcpy(ifname, tbp[IFLA_IFNAME], IFNAMSIZ);
Tom Gundersen55177502014-07-14 16:37:25 +02001271 name_assign_type = NET_NAME_USER;
1272 } else {
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001273 snprintf(ifname, IFNAMSIZ, DRV_NAME "%%d");
Tom Gundersen55177502014-07-14 16:37:25 +02001274 name_assign_type = NET_NAME_ENUM;
1275 }
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001276
Eric W. Biederman81adee42009-11-08 00:53:51 -08001277 net = rtnl_link_get_net(src_net, tbp);
1278 if (IS_ERR(net))
1279 return PTR_ERR(net);
1280
Tom Gundersen55177502014-07-14 16:37:25 +02001281 peer = rtnl_create_link(net, ifname, name_assign_type,
David Ahernd0522f12018-11-06 12:51:14 -08001282 &veth_link_ops, tbp, extack);
Eric W. Biederman81adee42009-11-08 00:53:51 -08001283 if (IS_ERR(peer)) {
1284 put_net(net);
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001285 return PTR_ERR(peer);
Eric W. Biederman81adee42009-11-08 00:53:51 -08001286 }
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001287
Serhey Popovych191cdb32017-06-21 12:12:24 +03001288 if (!ifmp || !tbp[IFLA_ADDRESS])
Danny Kukawkaf2cedb62012-02-15 06:45:39 +00001289 eth_hw_addr_random(peer);
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001290
Pavel Emelyanove6f8f1a2012-08-08 21:53:03 +00001291 if (ifmp && (dev->ifindex != 0))
1292 peer->ifindex = ifmp->ifi_index;
1293
Stephen Hemminger72d249552017-12-07 15:40:20 -08001294 peer->gso_max_size = dev->gso_max_size;
1295 peer->gso_max_segs = dev->gso_max_segs;
1296
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001297 err = register_netdevice(peer);
Eric W. Biederman81adee42009-11-08 00:53:51 -08001298 put_net(net);
1299 net = NULL;
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001300 if (err < 0)
1301 goto err_register_peer;
1302
1303 netif_carrier_off(peer);
1304
Patrick McHardy3729d502010-02-26 06:34:54 +00001305 err = rtnl_configure_link(peer, ifmp);
1306 if (err < 0)
1307 goto err_configure_peer;
1308
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001309 /*
1310 * register dev last
1311 *
1312 * note, that since we've registered new device the dev's name
1313 * should be re-allocated
1314 */
1315
1316 if (tb[IFLA_ADDRESS] == NULL)
Danny Kukawkaf2cedb62012-02-15 06:45:39 +00001317 eth_hw_addr_random(dev);
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001318
Jiri Pirko6c8c4442011-04-30 01:28:17 +00001319 if (tb[IFLA_IFNAME])
1320 nla_strlcpy(dev->name, tb[IFLA_IFNAME], IFNAMSIZ);
1321 else
1322 snprintf(dev->name, IFNAMSIZ, DRV_NAME "%%d");
1323
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001324 err = register_netdevice(dev);
1325 if (err < 0)
1326 goto err_register_dev;
1327
1328 netif_carrier_off(dev);
1329
1330 /*
1331 * tie the deviced together
1332 */
1333
1334 priv = netdev_priv(dev);
Eric Dumazetd0e2c552013-01-04 15:42:40 +00001335 rcu_assign_pointer(priv->peer, peer);
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001336
1337 priv = netdev_priv(peer);
Eric Dumazetd0e2c552013-01-04 15:42:40 +00001338 rcu_assign_pointer(priv->peer, dev);
Toshiaki Makita948d4f22018-08-03 16:58:10 +09001339
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001340 return 0;
1341
1342err_register_dev:
1343 /* nothing to do */
Patrick McHardy3729d502010-02-26 06:34:54 +00001344err_configure_peer:
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001345 unregister_netdevice(peer);
1346 return err;
1347
1348err_register_peer:
1349 free_netdev(peer);
1350 return err;
1351}
1352
Eric Dumazet23289a32009-10-27 07:06:36 +00001353static void veth_dellink(struct net_device *dev, struct list_head *head)
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001354{
1355 struct veth_priv *priv;
1356 struct net_device *peer;
1357
1358 priv = netdev_priv(dev);
Eric Dumazetd0e2c552013-01-04 15:42:40 +00001359 peer = rtnl_dereference(priv->peer);
1360
1361 /* Note : dellink() is called from default_device_exit_batch(),
1362 * before a rcu_synchronize() point. The devices are guaranteed
1363 * not being freed before one RCU grace period.
1364 */
1365 RCU_INIT_POINTER(priv->peer, NULL);
Eric Dumazet24540532009-10-30 01:00:27 -07001366 unregister_netdevice_queue(dev, head);
Eric Dumazetf45a5c22013-02-08 20:10:49 +00001367
1368 if (peer) {
1369 priv = netdev_priv(peer);
1370 RCU_INIT_POINTER(priv->peer, NULL);
1371 unregister_netdevice_queue(peer, head);
1372 }
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001373}
1374
Thomas Graf23711432012-02-15 04:09:46 +00001375static const struct nla_policy veth_policy[VETH_INFO_MAX + 1] = {
1376 [VETH_INFO_PEER] = { .len = sizeof(struct ifinfomsg) },
1377};
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001378
Nicolas Dichtele5f4e7b2015-01-20 15:15:46 +01001379static struct net *veth_get_link_net(const struct net_device *dev)
1380{
1381 struct veth_priv *priv = netdev_priv(dev);
1382 struct net_device *peer = rtnl_dereference(priv->peer);
1383
1384 return peer ? dev_net(peer) : dev_net(dev);
1385}
1386
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001387static struct rtnl_link_ops veth_link_ops = {
1388 .kind = DRV_NAME,
1389 .priv_size = sizeof(struct veth_priv),
1390 .setup = veth_setup,
1391 .validate = veth_validate,
1392 .newlink = veth_newlink,
1393 .dellink = veth_dellink,
1394 .policy = veth_policy,
1395 .maxtype = VETH_INFO_MAX,
Nicolas Dichtele5f4e7b2015-01-20 15:15:46 +01001396 .get_link_net = veth_get_link_net,
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001397};
1398
1399/*
1400 * init/fini
1401 */
1402
1403static __init int veth_init(void)
1404{
1405 return rtnl_link_register(&veth_link_ops);
1406}
1407
1408static __exit void veth_exit(void)
1409{
Patrick McHardy68365452008-01-20 17:25:14 -08001410 rtnl_link_unregister(&veth_link_ops);
Pavel Emelyanove314dbd2007-09-25 16:14:46 -07001411}
1412
1413module_init(veth_init);
1414module_exit(veth_exit);
1415
1416MODULE_DESCRIPTION("Virtual Ethernet Tunnel");
1417MODULE_LICENSE("GPL v2");
1418MODULE_ALIAS_RTNL_LINK(DRV_NAME);