Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* Copyright (c) 2018, Intel Corporation. */ |
| 3 | |
| 4 | /* This provides a net_failover interface for paravirtual drivers to |
| 5 | * provide an alternate datapath by exporting APIs to create and |
| 6 | * destroy a upper 'net_failover' netdev. The upper dev manages the |
| 7 | * original paravirtual interface as a 'standby' netdev and uses the |
| 8 | * generic failover infrastructure to register and manage a direct |
| 9 | * attached VF as a 'primary' netdev. This enables live migration of |
| 10 | * a VM with direct attached VF by failing over to the paravirtual |
| 11 | * datapath when the VF is unplugged. |
| 12 | * |
| 13 | * Some of the netdev management routines are based on bond/team driver as |
| 14 | * this driver provides active-backup functionality similar to those drivers. |
| 15 | */ |
| 16 | |
| 17 | #include <linux/netdevice.h> |
| 18 | #include <linux/etherdevice.h> |
| 19 | #include <linux/ethtool.h> |
| 20 | #include <linux/module.h> |
| 21 | #include <linux/slab.h> |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 22 | #include <linux/netpoll.h> |
| 23 | #include <linux/rtnetlink.h> |
| 24 | #include <linux/if_vlan.h> |
| 25 | #include <linux/pci.h> |
| 26 | #include <net/sch_generic.h> |
| 27 | #include <uapi/linux/if_arp.h> |
| 28 | #include <net/net_failover.h> |
| 29 | |
| 30 | static bool net_failover_xmit_ready(struct net_device *dev) |
| 31 | { |
| 32 | return netif_running(dev) && netif_carrier_ok(dev); |
| 33 | } |
| 34 | |
| 35 | static int net_failover_open(struct net_device *dev) |
| 36 | { |
| 37 | struct net_failover_info *nfo_info = netdev_priv(dev); |
| 38 | struct net_device *primary_dev, *standby_dev; |
| 39 | int err; |
| 40 | |
| 41 | primary_dev = rtnl_dereference(nfo_info->primary_dev); |
| 42 | if (primary_dev) { |
Petr Machata | 00f54e6 | 2018-12-06 17:05:36 +0000 | [diff] [blame] | 43 | err = dev_open(primary_dev, NULL); |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 44 | if (err) |
| 45 | goto err_primary_open; |
| 46 | } |
| 47 | |
| 48 | standby_dev = rtnl_dereference(nfo_info->standby_dev); |
| 49 | if (standby_dev) { |
Petr Machata | 00f54e6 | 2018-12-06 17:05:36 +0000 | [diff] [blame] | 50 | err = dev_open(standby_dev, NULL); |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 51 | if (err) |
| 52 | goto err_standby_open; |
| 53 | } |
| 54 | |
| 55 | if ((primary_dev && net_failover_xmit_ready(primary_dev)) || |
| 56 | (standby_dev && net_failover_xmit_ready(standby_dev))) { |
| 57 | netif_carrier_on(dev); |
| 58 | netif_tx_wake_all_queues(dev); |
| 59 | } |
| 60 | |
| 61 | return 0; |
| 62 | |
| 63 | err_standby_open: |
| 64 | dev_close(primary_dev); |
| 65 | err_primary_open: |
| 66 | netif_tx_disable(dev); |
| 67 | return err; |
| 68 | } |
| 69 | |
| 70 | static int net_failover_close(struct net_device *dev) |
| 71 | { |
| 72 | struct net_failover_info *nfo_info = netdev_priv(dev); |
| 73 | struct net_device *slave_dev; |
| 74 | |
| 75 | netif_tx_disable(dev); |
| 76 | |
| 77 | slave_dev = rtnl_dereference(nfo_info->primary_dev); |
| 78 | if (slave_dev) |
| 79 | dev_close(slave_dev); |
| 80 | |
| 81 | slave_dev = rtnl_dereference(nfo_info->standby_dev); |
| 82 | if (slave_dev) |
| 83 | dev_close(slave_dev); |
| 84 | |
| 85 | return 0; |
| 86 | } |
| 87 | |
| 88 | static netdev_tx_t net_failover_drop_xmit(struct sk_buff *skb, |
| 89 | struct net_device *dev) |
| 90 | { |
| 91 | atomic_long_inc(&dev->tx_dropped); |
| 92 | dev_kfree_skb_any(skb); |
| 93 | return NETDEV_TX_OK; |
| 94 | } |
| 95 | |
| 96 | static netdev_tx_t net_failover_start_xmit(struct sk_buff *skb, |
| 97 | struct net_device *dev) |
| 98 | { |
| 99 | struct net_failover_info *nfo_info = netdev_priv(dev); |
| 100 | struct net_device *xmit_dev; |
| 101 | |
| 102 | /* Try xmit via primary netdev followed by standby netdev */ |
| 103 | xmit_dev = rcu_dereference_bh(nfo_info->primary_dev); |
| 104 | if (!xmit_dev || !net_failover_xmit_ready(xmit_dev)) { |
| 105 | xmit_dev = rcu_dereference_bh(nfo_info->standby_dev); |
| 106 | if (!xmit_dev || !net_failover_xmit_ready(xmit_dev)) |
| 107 | return net_failover_drop_xmit(skb, dev); |
| 108 | } |
| 109 | |
| 110 | skb->dev = xmit_dev; |
| 111 | skb->queue_mapping = qdisc_skb_cb(skb)->slave_dev_queue_mapping; |
| 112 | |
| 113 | return dev_queue_xmit(skb); |
| 114 | } |
| 115 | |
| 116 | static u16 net_failover_select_queue(struct net_device *dev, |
Alexander Duyck | 4f49dec | 2018-07-09 12:19:59 -0400 | [diff] [blame] | 117 | struct sk_buff *skb, |
| 118 | struct net_device *sb_dev, |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 119 | select_queue_fallback_t fallback) |
| 120 | { |
| 121 | struct net_failover_info *nfo_info = netdev_priv(dev); |
| 122 | struct net_device *primary_dev; |
| 123 | u16 txq; |
| 124 | |
| 125 | primary_dev = rcu_dereference(nfo_info->primary_dev); |
| 126 | if (primary_dev) { |
| 127 | const struct net_device_ops *ops = primary_dev->netdev_ops; |
| 128 | |
| 129 | if (ops->ndo_select_queue) |
| 130 | txq = ops->ndo_select_queue(primary_dev, skb, |
Alexander Duyck | 4f49dec | 2018-07-09 12:19:59 -0400 | [diff] [blame] | 131 | sb_dev, fallback); |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 132 | else |
Alexander Duyck | 8ec56fc | 2018-07-09 12:20:04 -0400 | [diff] [blame] | 133 | txq = fallback(primary_dev, skb, NULL); |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 134 | |
| 135 | qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping; |
| 136 | |
| 137 | return txq; |
| 138 | } |
| 139 | |
| 140 | txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : 0; |
| 141 | |
| 142 | /* Save the original txq to restore before passing to the driver */ |
| 143 | qdisc_skb_cb(skb)->slave_dev_queue_mapping = skb->queue_mapping; |
| 144 | |
| 145 | if (unlikely(txq >= dev->real_num_tx_queues)) { |
| 146 | do { |
| 147 | txq -= dev->real_num_tx_queues; |
| 148 | } while (txq >= dev->real_num_tx_queues); |
| 149 | } |
| 150 | |
| 151 | return txq; |
| 152 | } |
| 153 | |
| 154 | /* fold stats, assuming all rtnl_link_stats64 fields are u64, but |
| 155 | * that some drivers can provide 32bit values only. |
| 156 | */ |
| 157 | static void net_failover_fold_stats(struct rtnl_link_stats64 *_res, |
| 158 | const struct rtnl_link_stats64 *_new, |
| 159 | const struct rtnl_link_stats64 *_old) |
| 160 | { |
| 161 | const u64 *new = (const u64 *)_new; |
| 162 | const u64 *old = (const u64 *)_old; |
| 163 | u64 *res = (u64 *)_res; |
| 164 | int i; |
| 165 | |
| 166 | for (i = 0; i < sizeof(*_res) / sizeof(u64); i++) { |
| 167 | u64 nv = new[i]; |
| 168 | u64 ov = old[i]; |
| 169 | s64 delta = nv - ov; |
| 170 | |
| 171 | /* detects if this particular field is 32bit only */ |
| 172 | if (((nv | ov) >> 32) == 0) |
| 173 | delta = (s64)(s32)((u32)nv - (u32)ov); |
| 174 | |
| 175 | /* filter anomalies, some drivers reset their stats |
| 176 | * at down/up events. |
| 177 | */ |
| 178 | if (delta > 0) |
| 179 | res[i] += delta; |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | static void net_failover_get_stats(struct net_device *dev, |
| 184 | struct rtnl_link_stats64 *stats) |
| 185 | { |
| 186 | struct net_failover_info *nfo_info = netdev_priv(dev); |
| 187 | const struct rtnl_link_stats64 *new; |
| 188 | struct rtnl_link_stats64 temp; |
| 189 | struct net_device *slave_dev; |
| 190 | |
| 191 | spin_lock(&nfo_info->stats_lock); |
| 192 | memcpy(stats, &nfo_info->failover_stats, sizeof(*stats)); |
| 193 | |
| 194 | rcu_read_lock(); |
| 195 | |
| 196 | slave_dev = rcu_dereference(nfo_info->primary_dev); |
| 197 | if (slave_dev) { |
| 198 | new = dev_get_stats(slave_dev, &temp); |
| 199 | net_failover_fold_stats(stats, new, &nfo_info->primary_stats); |
| 200 | memcpy(&nfo_info->primary_stats, new, sizeof(*new)); |
| 201 | } |
| 202 | |
| 203 | slave_dev = rcu_dereference(nfo_info->standby_dev); |
| 204 | if (slave_dev) { |
| 205 | new = dev_get_stats(slave_dev, &temp); |
| 206 | net_failover_fold_stats(stats, new, &nfo_info->standby_stats); |
| 207 | memcpy(&nfo_info->standby_stats, new, sizeof(*new)); |
| 208 | } |
| 209 | |
| 210 | rcu_read_unlock(); |
| 211 | |
| 212 | memcpy(&nfo_info->failover_stats, stats, sizeof(*stats)); |
| 213 | spin_unlock(&nfo_info->stats_lock); |
| 214 | } |
| 215 | |
| 216 | static int net_failover_change_mtu(struct net_device *dev, int new_mtu) |
| 217 | { |
| 218 | struct net_failover_info *nfo_info = netdev_priv(dev); |
| 219 | struct net_device *primary_dev, *standby_dev; |
| 220 | int ret = 0; |
| 221 | |
Stephen Hemminger | 3260155 | 2018-07-27 13:43:21 -0700 | [diff] [blame] | 222 | primary_dev = rtnl_dereference(nfo_info->primary_dev); |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 223 | if (primary_dev) { |
| 224 | ret = dev_set_mtu(primary_dev, new_mtu); |
| 225 | if (ret) |
| 226 | return ret; |
| 227 | } |
| 228 | |
Stephen Hemminger | 3260155 | 2018-07-27 13:43:21 -0700 | [diff] [blame] | 229 | standby_dev = rtnl_dereference(nfo_info->standby_dev); |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 230 | if (standby_dev) { |
| 231 | ret = dev_set_mtu(standby_dev, new_mtu); |
| 232 | if (ret) { |
| 233 | if (primary_dev) |
| 234 | dev_set_mtu(primary_dev, dev->mtu); |
| 235 | return ret; |
| 236 | } |
| 237 | } |
| 238 | |
| 239 | dev->mtu = new_mtu; |
| 240 | |
| 241 | return 0; |
| 242 | } |
| 243 | |
| 244 | static void net_failover_set_rx_mode(struct net_device *dev) |
| 245 | { |
| 246 | struct net_failover_info *nfo_info = netdev_priv(dev); |
| 247 | struct net_device *slave_dev; |
| 248 | |
| 249 | rcu_read_lock(); |
| 250 | |
| 251 | slave_dev = rcu_dereference(nfo_info->primary_dev); |
| 252 | if (slave_dev) { |
| 253 | dev_uc_sync_multiple(slave_dev, dev); |
| 254 | dev_mc_sync_multiple(slave_dev, dev); |
| 255 | } |
| 256 | |
| 257 | slave_dev = rcu_dereference(nfo_info->standby_dev); |
| 258 | if (slave_dev) { |
| 259 | dev_uc_sync_multiple(slave_dev, dev); |
| 260 | dev_mc_sync_multiple(slave_dev, dev); |
| 261 | } |
| 262 | |
| 263 | rcu_read_unlock(); |
| 264 | } |
| 265 | |
| 266 | static int net_failover_vlan_rx_add_vid(struct net_device *dev, __be16 proto, |
| 267 | u16 vid) |
| 268 | { |
| 269 | struct net_failover_info *nfo_info = netdev_priv(dev); |
| 270 | struct net_device *primary_dev, *standby_dev; |
| 271 | int ret = 0; |
| 272 | |
| 273 | primary_dev = rcu_dereference(nfo_info->primary_dev); |
| 274 | if (primary_dev) { |
| 275 | ret = vlan_vid_add(primary_dev, proto, vid); |
| 276 | if (ret) |
| 277 | return ret; |
| 278 | } |
| 279 | |
| 280 | standby_dev = rcu_dereference(nfo_info->standby_dev); |
| 281 | if (standby_dev) { |
| 282 | ret = vlan_vid_add(standby_dev, proto, vid); |
| 283 | if (ret) |
| 284 | if (primary_dev) |
| 285 | vlan_vid_del(primary_dev, proto, vid); |
| 286 | } |
| 287 | |
| 288 | return ret; |
| 289 | } |
| 290 | |
| 291 | static int net_failover_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, |
| 292 | u16 vid) |
| 293 | { |
| 294 | struct net_failover_info *nfo_info = netdev_priv(dev); |
| 295 | struct net_device *slave_dev; |
| 296 | |
| 297 | slave_dev = rcu_dereference(nfo_info->primary_dev); |
| 298 | if (slave_dev) |
| 299 | vlan_vid_del(slave_dev, proto, vid); |
| 300 | |
| 301 | slave_dev = rcu_dereference(nfo_info->standby_dev); |
| 302 | if (slave_dev) |
| 303 | vlan_vid_del(slave_dev, proto, vid); |
| 304 | |
| 305 | return 0; |
| 306 | } |
| 307 | |
| 308 | static const struct net_device_ops failover_dev_ops = { |
| 309 | .ndo_open = net_failover_open, |
| 310 | .ndo_stop = net_failover_close, |
| 311 | .ndo_start_xmit = net_failover_start_xmit, |
| 312 | .ndo_select_queue = net_failover_select_queue, |
| 313 | .ndo_get_stats64 = net_failover_get_stats, |
| 314 | .ndo_change_mtu = net_failover_change_mtu, |
| 315 | .ndo_set_rx_mode = net_failover_set_rx_mode, |
| 316 | .ndo_vlan_rx_add_vid = net_failover_vlan_rx_add_vid, |
| 317 | .ndo_vlan_rx_kill_vid = net_failover_vlan_rx_kill_vid, |
| 318 | .ndo_validate_addr = eth_validate_addr, |
| 319 | .ndo_features_check = passthru_features_check, |
| 320 | }; |
| 321 | |
| 322 | #define FAILOVER_NAME "net_failover" |
| 323 | #define FAILOVER_VERSION "0.1" |
| 324 | |
| 325 | static void nfo_ethtool_get_drvinfo(struct net_device *dev, |
| 326 | struct ethtool_drvinfo *drvinfo) |
| 327 | { |
| 328 | strlcpy(drvinfo->driver, FAILOVER_NAME, sizeof(drvinfo->driver)); |
| 329 | strlcpy(drvinfo->version, FAILOVER_VERSION, sizeof(drvinfo->version)); |
| 330 | } |
| 331 | |
| 332 | static int nfo_ethtool_get_link_ksettings(struct net_device *dev, |
| 333 | struct ethtool_link_ksettings *cmd) |
| 334 | { |
| 335 | struct net_failover_info *nfo_info = netdev_priv(dev); |
| 336 | struct net_device *slave_dev; |
| 337 | |
| 338 | slave_dev = rtnl_dereference(nfo_info->primary_dev); |
| 339 | if (!slave_dev || !net_failover_xmit_ready(slave_dev)) { |
| 340 | slave_dev = rtnl_dereference(nfo_info->standby_dev); |
| 341 | if (!slave_dev || !net_failover_xmit_ready(slave_dev)) { |
| 342 | cmd->base.duplex = DUPLEX_UNKNOWN; |
| 343 | cmd->base.port = PORT_OTHER; |
| 344 | cmd->base.speed = SPEED_UNKNOWN; |
| 345 | |
| 346 | return 0; |
| 347 | } |
| 348 | } |
| 349 | |
| 350 | return __ethtool_get_link_ksettings(slave_dev, cmd); |
| 351 | } |
| 352 | |
| 353 | static const struct ethtool_ops failover_ethtool_ops = { |
| 354 | .get_drvinfo = nfo_ethtool_get_drvinfo, |
| 355 | .get_link = ethtool_op_get_link, |
| 356 | .get_link_ksettings = nfo_ethtool_get_link_ksettings, |
| 357 | }; |
| 358 | |
| 359 | /* Called when slave dev is injecting data into network stack. |
| 360 | * Change the associated network device from lower dev to failover dev. |
| 361 | * note: already called with rcu_read_lock |
| 362 | */ |
| 363 | static rx_handler_result_t net_failover_handle_frame(struct sk_buff **pskb) |
| 364 | { |
| 365 | struct sk_buff *skb = *pskb; |
| 366 | struct net_device *dev = rcu_dereference(skb->dev->rx_handler_data); |
| 367 | struct net_failover_info *nfo_info = netdev_priv(dev); |
| 368 | struct net_device *primary_dev, *standby_dev; |
| 369 | |
| 370 | primary_dev = rcu_dereference(nfo_info->primary_dev); |
| 371 | standby_dev = rcu_dereference(nfo_info->standby_dev); |
| 372 | |
| 373 | if (primary_dev && skb->dev == standby_dev) |
| 374 | return RX_HANDLER_EXACT; |
| 375 | |
| 376 | skb->dev = dev; |
| 377 | |
| 378 | return RX_HANDLER_ANOTHER; |
| 379 | } |
| 380 | |
| 381 | static void net_failover_compute_features(struct net_device *dev) |
| 382 | { |
Dan Carpenter | a746407 | 2018-06-04 17:43:21 +0300 | [diff] [blame] | 383 | netdev_features_t vlan_features = FAILOVER_VLAN_FEATURES & |
| 384 | NETIF_F_ALL_FOR_ALL; |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 385 | netdev_features_t enc_features = FAILOVER_ENC_FEATURES; |
| 386 | unsigned short max_hard_header_len = ETH_HLEN; |
| 387 | unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE | |
| 388 | IFF_XMIT_DST_RELEASE_PERM; |
| 389 | struct net_failover_info *nfo_info = netdev_priv(dev); |
| 390 | struct net_device *primary_dev, *standby_dev; |
| 391 | |
| 392 | primary_dev = rcu_dereference(nfo_info->primary_dev); |
| 393 | if (primary_dev) { |
| 394 | vlan_features = |
| 395 | netdev_increment_features(vlan_features, |
| 396 | primary_dev->vlan_features, |
| 397 | FAILOVER_VLAN_FEATURES); |
| 398 | enc_features = |
| 399 | netdev_increment_features(enc_features, |
| 400 | primary_dev->hw_enc_features, |
| 401 | FAILOVER_ENC_FEATURES); |
| 402 | |
| 403 | dst_release_flag &= primary_dev->priv_flags; |
| 404 | if (primary_dev->hard_header_len > max_hard_header_len) |
| 405 | max_hard_header_len = primary_dev->hard_header_len; |
| 406 | } |
| 407 | |
| 408 | standby_dev = rcu_dereference(nfo_info->standby_dev); |
| 409 | if (standby_dev) { |
| 410 | vlan_features = |
| 411 | netdev_increment_features(vlan_features, |
| 412 | standby_dev->vlan_features, |
| 413 | FAILOVER_VLAN_FEATURES); |
| 414 | enc_features = |
| 415 | netdev_increment_features(enc_features, |
| 416 | standby_dev->hw_enc_features, |
| 417 | FAILOVER_ENC_FEATURES); |
| 418 | |
| 419 | dst_release_flag &= standby_dev->priv_flags; |
| 420 | if (standby_dev->hard_header_len > max_hard_header_len) |
| 421 | max_hard_header_len = standby_dev->hard_header_len; |
| 422 | } |
| 423 | |
| 424 | dev->vlan_features = vlan_features; |
| 425 | dev->hw_enc_features = enc_features | NETIF_F_GSO_ENCAP_ALL; |
| 426 | dev->hard_header_len = max_hard_header_len; |
| 427 | |
| 428 | dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; |
| 429 | if (dst_release_flag == (IFF_XMIT_DST_RELEASE | |
| 430 | IFF_XMIT_DST_RELEASE_PERM)) |
| 431 | dev->priv_flags |= IFF_XMIT_DST_RELEASE; |
| 432 | |
| 433 | netdev_change_features(dev); |
| 434 | } |
| 435 | |
| 436 | static void net_failover_lower_state_changed(struct net_device *slave_dev, |
| 437 | struct net_device *primary_dev, |
| 438 | struct net_device *standby_dev) |
| 439 | { |
| 440 | struct netdev_lag_lower_state_info info; |
| 441 | |
| 442 | if (netif_carrier_ok(slave_dev)) |
| 443 | info.link_up = true; |
| 444 | else |
| 445 | info.link_up = false; |
| 446 | |
| 447 | if (slave_dev == primary_dev) { |
| 448 | if (netif_running(primary_dev)) |
| 449 | info.tx_enabled = true; |
| 450 | else |
| 451 | info.tx_enabled = false; |
| 452 | } else { |
| 453 | if ((primary_dev && netif_running(primary_dev)) || |
| 454 | (!netif_running(standby_dev))) |
| 455 | info.tx_enabled = false; |
| 456 | else |
| 457 | info.tx_enabled = true; |
| 458 | } |
| 459 | |
| 460 | netdev_lower_state_changed(slave_dev, &info); |
| 461 | } |
| 462 | |
| 463 | static int net_failover_slave_pre_register(struct net_device *slave_dev, |
| 464 | struct net_device *failover_dev) |
| 465 | { |
| 466 | struct net_device *standby_dev, *primary_dev; |
| 467 | struct net_failover_info *nfo_info; |
| 468 | bool slave_is_standby; |
| 469 | |
| 470 | nfo_info = netdev_priv(failover_dev); |
| 471 | standby_dev = rtnl_dereference(nfo_info->standby_dev); |
| 472 | primary_dev = rtnl_dereference(nfo_info->primary_dev); |
| 473 | slave_is_standby = slave_dev->dev.parent == failover_dev->dev.parent; |
| 474 | if (slave_is_standby ? standby_dev : primary_dev) { |
| 475 | netdev_err(failover_dev, "%s attempting to register as slave dev when %s already present\n", |
| 476 | slave_dev->name, |
| 477 | slave_is_standby ? "standby" : "primary"); |
| 478 | return -EINVAL; |
| 479 | } |
| 480 | |
| 481 | /* We want to allow only a direct attached VF device as a primary |
| 482 | * netdev. As there is no easy way to check for a VF device, restrict |
| 483 | * this to a pci device. |
| 484 | */ |
| 485 | if (!slave_is_standby && (!slave_dev->dev.parent || |
| 486 | !dev_is_pci(slave_dev->dev.parent))) |
| 487 | return -EINVAL; |
| 488 | |
| 489 | if (failover_dev->features & NETIF_F_VLAN_CHALLENGED && |
| 490 | vlan_uses_dev(failover_dev)) { |
| 491 | netdev_err(failover_dev, "Device %s is VLAN challenged and failover device has VLAN set up\n", |
| 492 | failover_dev->name); |
| 493 | return -EINVAL; |
| 494 | } |
| 495 | |
| 496 | return 0; |
| 497 | } |
| 498 | |
| 499 | static int net_failover_slave_register(struct net_device *slave_dev, |
| 500 | struct net_device *failover_dev) |
| 501 | { |
| 502 | struct net_device *standby_dev, *primary_dev; |
| 503 | struct net_failover_info *nfo_info; |
| 504 | bool slave_is_standby; |
| 505 | u32 orig_mtu; |
| 506 | int err; |
| 507 | |
| 508 | /* Align MTU of slave with failover dev */ |
| 509 | orig_mtu = slave_dev->mtu; |
| 510 | err = dev_set_mtu(slave_dev, failover_dev->mtu); |
| 511 | if (err) { |
| 512 | netdev_err(failover_dev, "unable to change mtu of %s to %u register failed\n", |
| 513 | slave_dev->name, failover_dev->mtu); |
| 514 | goto done; |
| 515 | } |
| 516 | |
| 517 | dev_hold(slave_dev); |
| 518 | |
| 519 | if (netif_running(failover_dev)) { |
Petr Machata | 00f54e6 | 2018-12-06 17:05:36 +0000 | [diff] [blame] | 520 | err = dev_open(slave_dev, NULL); |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 521 | if (err && (err != -EBUSY)) { |
| 522 | netdev_err(failover_dev, "Opening slave %s failed err:%d\n", |
| 523 | slave_dev->name, err); |
| 524 | goto err_dev_open; |
| 525 | } |
| 526 | } |
| 527 | |
| 528 | netif_addr_lock_bh(failover_dev); |
| 529 | dev_uc_sync_multiple(slave_dev, failover_dev); |
Liran Alon | e522343 | 2018-06-18 15:04:05 +0300 | [diff] [blame] | 530 | dev_mc_sync_multiple(slave_dev, failover_dev); |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 531 | netif_addr_unlock_bh(failover_dev); |
| 532 | |
| 533 | err = vlan_vids_add_by_dev(slave_dev, failover_dev); |
| 534 | if (err) { |
| 535 | netdev_err(failover_dev, "Failed to add vlan ids to device %s err:%d\n", |
| 536 | slave_dev->name, err); |
| 537 | goto err_vlan_add; |
| 538 | } |
| 539 | |
| 540 | nfo_info = netdev_priv(failover_dev); |
| 541 | standby_dev = rtnl_dereference(nfo_info->standby_dev); |
| 542 | primary_dev = rtnl_dereference(nfo_info->primary_dev); |
| 543 | slave_is_standby = slave_dev->dev.parent == failover_dev->dev.parent; |
| 544 | |
| 545 | if (slave_is_standby) { |
| 546 | rcu_assign_pointer(nfo_info->standby_dev, slave_dev); |
| 547 | standby_dev = slave_dev; |
| 548 | dev_get_stats(standby_dev, &nfo_info->standby_stats); |
| 549 | } else { |
| 550 | rcu_assign_pointer(nfo_info->primary_dev, slave_dev); |
| 551 | primary_dev = slave_dev; |
| 552 | dev_get_stats(primary_dev, &nfo_info->primary_stats); |
| 553 | failover_dev->min_mtu = slave_dev->min_mtu; |
| 554 | failover_dev->max_mtu = slave_dev->max_mtu; |
| 555 | } |
| 556 | |
| 557 | net_failover_lower_state_changed(slave_dev, primary_dev, standby_dev); |
| 558 | net_failover_compute_features(failover_dev); |
| 559 | |
| 560 | call_netdevice_notifiers(NETDEV_JOIN, slave_dev); |
| 561 | |
| 562 | netdev_info(failover_dev, "failover %s slave:%s registered\n", |
| 563 | slave_is_standby ? "standby" : "primary", slave_dev->name); |
| 564 | |
| 565 | return 0; |
| 566 | |
| 567 | err_vlan_add: |
| 568 | dev_uc_unsync(slave_dev, failover_dev); |
| 569 | dev_mc_unsync(slave_dev, failover_dev); |
| 570 | dev_close(slave_dev); |
| 571 | err_dev_open: |
| 572 | dev_put(slave_dev); |
| 573 | dev_set_mtu(slave_dev, orig_mtu); |
| 574 | done: |
| 575 | return err; |
| 576 | } |
| 577 | |
| 578 | static int net_failover_slave_pre_unregister(struct net_device *slave_dev, |
| 579 | struct net_device *failover_dev) |
| 580 | { |
| 581 | struct net_device *standby_dev, *primary_dev; |
| 582 | struct net_failover_info *nfo_info; |
| 583 | |
| 584 | nfo_info = netdev_priv(failover_dev); |
| 585 | primary_dev = rtnl_dereference(nfo_info->primary_dev); |
| 586 | standby_dev = rtnl_dereference(nfo_info->standby_dev); |
| 587 | |
| 588 | if (slave_dev != primary_dev && slave_dev != standby_dev) |
| 589 | return -ENODEV; |
| 590 | |
| 591 | return 0; |
| 592 | } |
| 593 | |
| 594 | static int net_failover_slave_unregister(struct net_device *slave_dev, |
| 595 | struct net_device *failover_dev) |
| 596 | { |
| 597 | struct net_device *standby_dev, *primary_dev; |
| 598 | struct net_failover_info *nfo_info; |
| 599 | bool slave_is_standby; |
| 600 | |
| 601 | nfo_info = netdev_priv(failover_dev); |
| 602 | primary_dev = rtnl_dereference(nfo_info->primary_dev); |
| 603 | standby_dev = rtnl_dereference(nfo_info->standby_dev); |
| 604 | |
YueHaibing | 9e7e6ca | 2018-09-04 02:56:26 +0000 | [diff] [blame] | 605 | if (WARN_ON_ONCE(slave_dev != primary_dev && slave_dev != standby_dev)) |
| 606 | return -ENODEV; |
| 607 | |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 608 | vlan_vids_del_by_dev(slave_dev, failover_dev); |
| 609 | dev_uc_unsync(slave_dev, failover_dev); |
| 610 | dev_mc_unsync(slave_dev, failover_dev); |
| 611 | dev_close(slave_dev); |
| 612 | |
| 613 | nfo_info = netdev_priv(failover_dev); |
| 614 | dev_get_stats(failover_dev, &nfo_info->failover_stats); |
| 615 | |
| 616 | slave_is_standby = slave_dev->dev.parent == failover_dev->dev.parent; |
| 617 | if (slave_is_standby) { |
| 618 | RCU_INIT_POINTER(nfo_info->standby_dev, NULL); |
| 619 | } else { |
| 620 | RCU_INIT_POINTER(nfo_info->primary_dev, NULL); |
| 621 | if (standby_dev) { |
| 622 | failover_dev->min_mtu = standby_dev->min_mtu; |
| 623 | failover_dev->max_mtu = standby_dev->max_mtu; |
| 624 | } |
| 625 | } |
| 626 | |
| 627 | dev_put(slave_dev); |
| 628 | |
| 629 | net_failover_compute_features(failover_dev); |
| 630 | |
| 631 | netdev_info(failover_dev, "failover %s slave:%s unregistered\n", |
| 632 | slave_is_standby ? "standby" : "primary", slave_dev->name); |
| 633 | |
| 634 | return 0; |
| 635 | } |
| 636 | |
| 637 | static int net_failover_slave_link_change(struct net_device *slave_dev, |
| 638 | struct net_device *failover_dev) |
| 639 | { |
| 640 | struct net_device *primary_dev, *standby_dev; |
| 641 | struct net_failover_info *nfo_info; |
| 642 | |
| 643 | nfo_info = netdev_priv(failover_dev); |
| 644 | |
| 645 | primary_dev = rtnl_dereference(nfo_info->primary_dev); |
| 646 | standby_dev = rtnl_dereference(nfo_info->standby_dev); |
| 647 | |
| 648 | if (slave_dev != primary_dev && slave_dev != standby_dev) |
| 649 | return -ENODEV; |
| 650 | |
| 651 | if ((primary_dev && net_failover_xmit_ready(primary_dev)) || |
| 652 | (standby_dev && net_failover_xmit_ready(standby_dev))) { |
| 653 | netif_carrier_on(failover_dev); |
| 654 | netif_tx_wake_all_queues(failover_dev); |
| 655 | } else { |
| 656 | dev_get_stats(failover_dev, &nfo_info->failover_stats); |
| 657 | netif_carrier_off(failover_dev); |
| 658 | netif_tx_stop_all_queues(failover_dev); |
| 659 | } |
| 660 | |
| 661 | net_failover_lower_state_changed(slave_dev, primary_dev, standby_dev); |
| 662 | |
| 663 | return 0; |
| 664 | } |
| 665 | |
| 666 | static int net_failover_slave_name_change(struct net_device *slave_dev, |
| 667 | struct net_device *failover_dev) |
| 668 | { |
| 669 | struct net_device *primary_dev, *standby_dev; |
| 670 | struct net_failover_info *nfo_info; |
| 671 | |
| 672 | nfo_info = netdev_priv(failover_dev); |
| 673 | |
| 674 | primary_dev = rtnl_dereference(nfo_info->primary_dev); |
| 675 | standby_dev = rtnl_dereference(nfo_info->standby_dev); |
| 676 | |
| 677 | if (slave_dev != primary_dev && slave_dev != standby_dev) |
| 678 | return -ENODEV; |
| 679 | |
| 680 | /* We need to bring up the slave after the rename by udev in case |
| 681 | * open failed with EBUSY when it was registered. |
| 682 | */ |
Petr Machata | 00f54e6 | 2018-12-06 17:05:36 +0000 | [diff] [blame] | 683 | dev_open(slave_dev, NULL); |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 684 | |
| 685 | return 0; |
| 686 | } |
| 687 | |
| 688 | static struct failover_ops net_failover_ops = { |
| 689 | .slave_pre_register = net_failover_slave_pre_register, |
| 690 | .slave_register = net_failover_slave_register, |
| 691 | .slave_pre_unregister = net_failover_slave_pre_unregister, |
| 692 | .slave_unregister = net_failover_slave_unregister, |
| 693 | .slave_link_change = net_failover_slave_link_change, |
| 694 | .slave_name_change = net_failover_slave_name_change, |
| 695 | .slave_handle_frame = net_failover_handle_frame, |
| 696 | }; |
| 697 | |
| 698 | /** |
| 699 | * net_failover_create - Create and register a failover instance |
| 700 | * |
| 701 | * @dev: standby netdev |
| 702 | * |
| 703 | * Creates a failover netdev and registers a failover instance for a standby |
| 704 | * netdev. Used by paravirtual drivers that use 3-netdev model. |
| 705 | * The failover netdev acts as a master device and controls 2 slave devices - |
| 706 | * the original standby netdev and a VF netdev with the same MAC gets |
| 707 | * registered as primary netdev. |
| 708 | * |
| 709 | * Return: pointer to failover instance |
| 710 | */ |
| 711 | struct failover *net_failover_create(struct net_device *standby_dev) |
| 712 | { |
| 713 | struct device *dev = standby_dev->dev.parent; |
| 714 | struct net_device *failover_dev; |
| 715 | struct failover *failover; |
| 716 | int err; |
| 717 | |
| 718 | /* Alloc at least 2 queues, for now we are going with 16 assuming |
| 719 | * that VF devices being enslaved won't have too many queues. |
| 720 | */ |
| 721 | failover_dev = alloc_etherdev_mq(sizeof(struct net_failover_info), 16); |
| 722 | if (!failover_dev) { |
| 723 | dev_err(dev, "Unable to allocate failover_netdev!\n"); |
| 724 | return ERR_PTR(-ENOMEM); |
| 725 | } |
| 726 | |
| 727 | dev_net_set(failover_dev, dev_net(standby_dev)); |
| 728 | SET_NETDEV_DEV(failover_dev, dev); |
| 729 | |
| 730 | failover_dev->netdev_ops = &failover_dev_ops; |
| 731 | failover_dev->ethtool_ops = &failover_ethtool_ops; |
| 732 | |
| 733 | /* Initialize the device options */ |
| 734 | failover_dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE; |
| 735 | failover_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | |
| 736 | IFF_TX_SKB_SHARING); |
| 737 | |
| 738 | /* don't acquire failover netdev's netif_tx_lock when transmitting */ |
| 739 | failover_dev->features |= NETIF_F_LLTX; |
| 740 | |
| 741 | /* Don't allow failover devices to change network namespaces. */ |
| 742 | failover_dev->features |= NETIF_F_NETNS_LOCAL; |
| 743 | |
| 744 | failover_dev->hw_features = FAILOVER_VLAN_FEATURES | |
| 745 | NETIF_F_HW_VLAN_CTAG_TX | |
| 746 | NETIF_F_HW_VLAN_CTAG_RX | |
| 747 | NETIF_F_HW_VLAN_CTAG_FILTER; |
| 748 | |
| 749 | failover_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL; |
| 750 | failover_dev->features |= failover_dev->hw_features; |
| 751 | |
| 752 | memcpy(failover_dev->dev_addr, standby_dev->dev_addr, |
| 753 | failover_dev->addr_len); |
| 754 | |
| 755 | failover_dev->min_mtu = standby_dev->min_mtu; |
| 756 | failover_dev->max_mtu = standby_dev->max_mtu; |
| 757 | |
| 758 | err = register_netdev(failover_dev); |
| 759 | if (err) { |
| 760 | dev_err(dev, "Unable to register failover_dev!\n"); |
| 761 | goto err_register_netdev; |
| 762 | } |
| 763 | |
| 764 | netif_carrier_off(failover_dev); |
| 765 | |
| 766 | failover = failover_register(failover_dev, &net_failover_ops); |
YueHaibing | 09317da | 2018-09-06 21:04:12 +0800 | [diff] [blame] | 767 | if (IS_ERR(failover)) { |
| 768 | err = PTR_ERR(failover); |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 769 | goto err_failover_register; |
YueHaibing | 09317da | 2018-09-06 21:04:12 +0800 | [diff] [blame] | 770 | } |
Sridhar Samudrala | cfc80d9 | 2018-05-24 09:55:15 -0700 | [diff] [blame] | 771 | |
| 772 | return failover; |
| 773 | |
| 774 | err_failover_register: |
| 775 | unregister_netdev(failover_dev); |
| 776 | err_register_netdev: |
| 777 | free_netdev(failover_dev); |
| 778 | |
| 779 | return ERR_PTR(err); |
| 780 | } |
| 781 | EXPORT_SYMBOL_GPL(net_failover_create); |
| 782 | |
| 783 | /** |
| 784 | * net_failover_destroy - Destroy a failover instance |
| 785 | * |
| 786 | * @failover: pointer to failover instance |
| 787 | * |
| 788 | * Unregisters any slave netdevs associated with the failover instance by |
| 789 | * calling failover_slave_unregister(). |
| 790 | * unregisters the failover instance itself and finally frees the failover |
| 791 | * netdev. Used by paravirtual drivers that use 3-netdev model. |
| 792 | * |
| 793 | */ |
| 794 | void net_failover_destroy(struct failover *failover) |
| 795 | { |
| 796 | struct net_failover_info *nfo_info; |
| 797 | struct net_device *failover_dev; |
| 798 | struct net_device *slave_dev; |
| 799 | |
| 800 | if (!failover) |
| 801 | return; |
| 802 | |
| 803 | failover_dev = rcu_dereference(failover->failover_dev); |
| 804 | nfo_info = netdev_priv(failover_dev); |
| 805 | |
| 806 | netif_device_detach(failover_dev); |
| 807 | |
| 808 | rtnl_lock(); |
| 809 | |
| 810 | slave_dev = rtnl_dereference(nfo_info->primary_dev); |
| 811 | if (slave_dev) |
| 812 | failover_slave_unregister(slave_dev); |
| 813 | |
| 814 | slave_dev = rtnl_dereference(nfo_info->standby_dev); |
| 815 | if (slave_dev) |
| 816 | failover_slave_unregister(slave_dev); |
| 817 | |
| 818 | failover_unregister(failover); |
| 819 | |
| 820 | unregister_netdevice(failover_dev); |
| 821 | |
| 822 | rtnl_unlock(); |
| 823 | |
| 824 | free_netdev(failover_dev); |
| 825 | } |
| 826 | EXPORT_SYMBOL_GPL(net_failover_destroy); |
| 827 | |
| 828 | static __init int |
| 829 | net_failover_init(void) |
| 830 | { |
| 831 | return 0; |
| 832 | } |
| 833 | module_init(net_failover_init); |
| 834 | |
| 835 | static __exit |
| 836 | void net_failover_exit(void) |
| 837 | { |
| 838 | } |
| 839 | module_exit(net_failover_exit); |
| 840 | |
| 841 | MODULE_DESCRIPTION("Failover driver for Paravirtual drivers"); |
| 842 | MODULE_LICENSE("GPL v2"); |