Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* |
| 2 | * INET An implementation of the TCP/IP protocol suite for the LINUX |
| 3 | * operating system. INET is implemented using the BSD Socket |
| 4 | * interface as the means of communication with the user level. |
| 5 | * |
| 6 | * Pseudo-driver for the loopback interface. |
| 7 | * |
| 8 | * Version: @(#)loopback.c 1.0.4b 08/16/93 |
| 9 | * |
Jesper Juhl | 02c30a8 | 2005-05-05 16:16:16 -0700 | [diff] [blame] | 10 | * Authors: Ross Biro |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 11 | * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> |
| 12 | * Donald Becker, <becker@scyld.com> |
| 13 | * |
| 14 | * Alan Cox : Fixed oddments for NET3.014 |
| 15 | * Alan Cox : Rejig for NET3.029 snap #3 |
| 16 | * Alan Cox : Fixed NET3.029 bugs and sped up |
| 17 | * Larry McVoy : Tiny tweak to double performance |
| 18 | * Alan Cox : Backed out LMV's tweak - the linux mm |
| 19 | * can't take it... |
| 20 | * Michael Griffith: Don't bother computing the checksums |
| 21 | * on packets received on the loopback |
| 22 | * interface. |
| 23 | * Alexey Kuznetsov: Potential hang under some extreme |
| 24 | * cases removed. |
| 25 | * |
| 26 | * This program is free software; you can redistribute it and/or |
| 27 | * modify it under the terms of the GNU General Public License |
| 28 | * as published by the Free Software Foundation; either version |
| 29 | * 2 of the License, or (at your option) any later version. |
| 30 | */ |
| 31 | #include <linux/kernel.h> |
| 32 | #include <linux/jiffies.h> |
| 33 | #include <linux/module.h> |
| 34 | #include <linux/interrupt.h> |
| 35 | #include <linux/fs.h> |
| 36 | #include <linux/types.h> |
| 37 | #include <linux/string.h> |
| 38 | #include <linux/socket.h> |
| 39 | #include <linux/errno.h> |
| 40 | #include <linux/fcntl.h> |
| 41 | #include <linux/in.h> |
| 42 | #include <linux/init.h> |
| 43 | |
| 44 | #include <asm/system.h> |
| 45 | #include <asm/uaccess.h> |
| 46 | #include <asm/io.h> |
| 47 | |
| 48 | #include <linux/inet.h> |
| 49 | #include <linux/netdevice.h> |
| 50 | #include <linux/etherdevice.h> |
| 51 | #include <linux/skbuff.h> |
| 52 | #include <linux/ethtool.h> |
| 53 | #include <net/sock.h> |
| 54 | #include <net/checksum.h> |
| 55 | #include <linux/if_ether.h> /* For the statistics structure. */ |
| 56 | #include <linux/if_arp.h> /* For ARPHRD_ETHER */ |
| 57 | #include <linux/ip.h> |
| 58 | #include <linux/tcp.h> |
| 59 | #include <linux/percpu.h> |
Eric W. Biederman | 2774c7a | 2007-09-26 22:10:56 -0700 | [diff] [blame] | 60 | #include <net/net_namespace.h> |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 61 | |
Eric Dumazet | 5175c37 | 2006-10-18 20:51:57 -0700 | [diff] [blame] | 62 | struct pcpu_lstats { |
| 63 | unsigned long packets; |
| 64 | unsigned long bytes; |
| 65 | }; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 66 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 67 | /* KISS: just allocate small chunks and copy bits. |
| 68 | * |
| 69 | * So, in fact, this is documentation, explaining what we expect |
| 70 | * of largesending device modulo TCP checksum, which is ignored for loopback. |
| 71 | */ |
| 72 | |
Chuck Ebbert | d2ae1d2 | 2005-07-02 21:28:21 -0400 | [diff] [blame] | 73 | #ifdef LOOPBACK_TSO |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 74 | static void emulate_large_send_offload(struct sk_buff *skb) |
| 75 | { |
Arnaldo Carvalho de Melo | eddc9ec | 2007-04-20 22:47:35 -0700 | [diff] [blame] | 76 | struct iphdr *iph = ip_hdr(skb); |
Arnaldo Carvalho de Melo | d56f90a | 2007-04-10 20:50:43 -0700 | [diff] [blame] | 77 | struct tcphdr *th = (struct tcphdr *)(skb_network_header(skb) + |
| 78 | (iph->ihl * 4)); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 79 | unsigned int doffset = (iph->ihl + th->doff) * 4; |
Herbert Xu | 7967168 | 2006-06-22 02:40:14 -0700 | [diff] [blame] | 80 | unsigned int mtu = skb_shinfo(skb)->gso_size + doffset; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 81 | unsigned int offset = 0; |
| 82 | u32 seq = ntohl(th->seq); |
| 83 | u16 id = ntohs(iph->id); |
| 84 | |
| 85 | while (offset + doffset < skb->len) { |
| 86 | unsigned int frag_size = min(mtu, skb->len - offset) - doffset; |
| 87 | struct sk_buff *nskb = alloc_skb(mtu + 32, GFP_ATOMIC); |
| 88 | |
| 89 | if (!nskb) |
| 90 | break; |
| 91 | skb_reserve(nskb, 32); |
Arnaldo Carvalho de Melo | 48d49d0c | 2007-03-10 12:30:58 -0300 | [diff] [blame] | 92 | skb_set_mac_header(nskb, -ETH_HLEN); |
Arnaldo Carvalho de Melo | c1d2bbe | 2007-04-10 20:45:18 -0700 | [diff] [blame] | 93 | skb_reset_network_header(nskb); |
Arnaldo Carvalho de Melo | eddc9ec | 2007-04-20 22:47:35 -0700 | [diff] [blame] | 94 | iph = ip_hdr(nskb); |
Arnaldo Carvalho de Melo | 27d7ff4 | 2007-03-31 11:55:19 -0300 | [diff] [blame] | 95 | skb_copy_to_linear_data(nskb, skb_network_header(skb), |
| 96 | doffset); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 97 | if (skb_copy_bits(skb, |
| 98 | doffset + offset, |
| 99 | nskb->data + doffset, |
| 100 | frag_size)) |
| 101 | BUG(); |
| 102 | skb_put(nskb, doffset + frag_size); |
| 103 | nskb->ip_summed = CHECKSUM_UNNECESSARY; |
| 104 | nskb->dev = skb->dev; |
| 105 | nskb->priority = skb->priority; |
| 106 | nskb->protocol = skb->protocol; |
| 107 | nskb->dst = dst_clone(skb->dst); |
| 108 | memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); |
| 109 | nskb->pkt_type = skb->pkt_type; |
| 110 | |
Arnaldo Carvalho de Melo | d56f90a | 2007-04-10 20:50:43 -0700 | [diff] [blame] | 111 | th = (struct tcphdr *)(skb_network_header(nskb) + iph->ihl * 4); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 112 | iph->tot_len = htons(frag_size + doffset); |
| 113 | iph->id = htons(id); |
| 114 | iph->check = 0; |
| 115 | iph->check = ip_fast_csum((unsigned char *) iph, iph->ihl); |
| 116 | th->seq = htonl(seq); |
| 117 | if (offset + doffset + frag_size < skb->len) |
| 118 | th->fin = th->psh = 0; |
| 119 | netif_rx(nskb); |
| 120 | offset += frag_size; |
| 121 | seq += frag_size; |
| 122 | id++; |
| 123 | } |
| 124 | |
| 125 | dev_kfree_skb(skb); |
| 126 | } |
Chuck Ebbert | d2ae1d2 | 2005-07-02 21:28:21 -0400 | [diff] [blame] | 127 | #endif /* LOOPBACK_TSO */ |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 128 | |
| 129 | /* |
| 130 | * The higher levels take care of making this non-reentrant (it's |
| 131 | * called with bh's disabled). |
| 132 | */ |
| 133 | static int loopback_xmit(struct sk_buff *skb, struct net_device *dev) |
| 134 | { |
Eric W. Biederman | 5f6d88b | 2007-09-26 22:08:12 -0700 | [diff] [blame] | 135 | struct pcpu_lstats *pcpu_lstats, *lb_stats; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 136 | |
| 137 | skb_orphan(skb); |
| 138 | |
Chuck Ebbert | 0e920bf | 2005-07-02 21:28:23 -0400 | [diff] [blame] | 139 | skb->protocol = eth_type_trans(skb,dev); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 140 | #ifndef LOOPBACK_MUST_CHECKSUM |
| 141 | skb->ip_summed = CHECKSUM_UNNECESSARY; |
| 142 | #endif |
| 143 | |
Chuck Ebbert | d2ae1d2 | 2005-07-02 21:28:21 -0400 | [diff] [blame] | 144 | #ifdef LOOPBACK_TSO |
Herbert Xu | 89114af | 2006-07-08 13:34:32 -0700 | [diff] [blame] | 145 | if (skb_is_gso(skb)) { |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 146 | BUG_ON(skb->protocol != htons(ETH_P_IP)); |
Arnaldo Carvalho de Melo | eddc9ec | 2007-04-20 22:47:35 -0700 | [diff] [blame] | 147 | BUG_ON(ip_hdr(skb)->protocol != IPPROTO_TCP); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 148 | |
| 149 | emulate_large_send_offload(skb); |
| 150 | return 0; |
| 151 | } |
Chuck Ebbert | d2ae1d2 | 2005-07-02 21:28:21 -0400 | [diff] [blame] | 152 | #endif |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 153 | dev->last_rx = jiffies; |
| 154 | |
Eric W. Biederman | 9e0db4b | 2007-09-27 17:09:39 -0700 | [diff] [blame] | 155 | /* it's OK to use per_cpu_ptr() because BHs are off */ |
Eric W. Biederman | 5f6d88b | 2007-09-26 22:08:12 -0700 | [diff] [blame] | 156 | pcpu_lstats = netdev_priv(dev); |
| 157 | lb_stats = per_cpu_ptr(pcpu_lstats, smp_processor_id()); |
Eric Dumazet | 5175c37 | 2006-10-18 20:51:57 -0700 | [diff] [blame] | 158 | lb_stats->bytes += skb->len; |
| 159 | lb_stats->packets++; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 160 | |
| 161 | netif_rx(skb); |
| 162 | |
Eric Dumazet | 58f5397 | 2006-10-20 00:32:41 -0700 | [diff] [blame] | 163 | return 0; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 164 | } |
| 165 | |
| 166 | static struct net_device_stats *get_stats(struct net_device *dev) |
| 167 | { |
Eric W. Biederman | 5f6d88b | 2007-09-26 22:08:12 -0700 | [diff] [blame] | 168 | const struct pcpu_lstats *pcpu_lstats; |
Eric Dumazet | 3303680 | 2007-04-10 13:25:40 -0700 | [diff] [blame] | 169 | struct net_device_stats *stats = &dev->stats; |
Eric Dumazet | 5175c37 | 2006-10-18 20:51:57 -0700 | [diff] [blame] | 170 | unsigned long bytes = 0; |
| 171 | unsigned long packets = 0; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 172 | int i; |
| 173 | |
Eric W. Biederman | 5f6d88b | 2007-09-26 22:08:12 -0700 | [diff] [blame] | 174 | pcpu_lstats = netdev_priv(dev); |
KAMEZAWA Hiroyuki | 0fed484 | 2006-03-28 01:56:37 -0800 | [diff] [blame] | 175 | for_each_possible_cpu(i) { |
Eric Dumazet | 5175c37 | 2006-10-18 20:51:57 -0700 | [diff] [blame] | 176 | const struct pcpu_lstats *lb_stats; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 177 | |
Eric W. Biederman | 5f6d88b | 2007-09-26 22:08:12 -0700 | [diff] [blame] | 178 | lb_stats = per_cpu_ptr(pcpu_lstats, i); |
Eric Dumazet | 5175c37 | 2006-10-18 20:51:57 -0700 | [diff] [blame] | 179 | bytes += lb_stats->bytes; |
| 180 | packets += lb_stats->packets; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 181 | } |
Eric Dumazet | 5175c37 | 2006-10-18 20:51:57 -0700 | [diff] [blame] | 182 | stats->rx_packets = packets; |
| 183 | stats->tx_packets = packets; |
| 184 | stats->rx_bytes = bytes; |
| 185 | stats->tx_bytes = bytes; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 186 | return stats; |
| 187 | } |
| 188 | |
Stephen Hemminger | 7fa6b06 | 2006-09-27 20:33:34 -0700 | [diff] [blame] | 189 | static u32 always_on(struct net_device *dev) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 190 | { |
| 191 | return 1; |
| 192 | } |
| 193 | |
Jeff Garzik | 7282d49 | 2006-09-13 14:30:00 -0400 | [diff] [blame] | 194 | static const struct ethtool_ops loopback_ethtool_ops = { |
Stephen Hemminger | 7fa6b06 | 2006-09-27 20:33:34 -0700 | [diff] [blame] | 195 | .get_link = always_on, |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 196 | .set_tso = ethtool_op_set_tso, |
Stephen Hemminger | 7fa6b06 | 2006-09-27 20:33:34 -0700 | [diff] [blame] | 197 | .get_tx_csum = always_on, |
| 198 | .get_sg = always_on, |
| 199 | .get_rx_csum = always_on, |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 200 | }; |
| 201 | |
Eric W. Biederman | 5f6d88b | 2007-09-26 22:08:12 -0700 | [diff] [blame] | 202 | static int loopback_dev_init(struct net_device *dev) |
| 203 | { |
| 204 | struct pcpu_lstats *lstats; |
| 205 | |
| 206 | lstats = alloc_percpu(struct pcpu_lstats); |
| 207 | if (!lstats) |
| 208 | return -ENOMEM; |
| 209 | |
| 210 | dev->priv = lstats; |
| 211 | return 0; |
| 212 | } |
| 213 | |
| 214 | static void loopback_dev_free(struct net_device *dev) |
| 215 | { |
| 216 | struct pcpu_lstats *lstats = netdev_priv(dev); |
| 217 | |
| 218 | free_percpu(lstats); |
| 219 | free_netdev(dev); |
| 220 | } |
| 221 | |
Stephen Hemminger | 7fa6b06 | 2006-09-27 20:33:34 -0700 | [diff] [blame] | 222 | /* |
Eric W. Biederman | 9e0db4b | 2007-09-27 17:09:39 -0700 | [diff] [blame] | 223 | * The loopback device is special. There is only one instance |
| 224 | * per network namespace. |
Stephen Hemminger | 7fa6b06 | 2006-09-27 20:33:34 -0700 | [diff] [blame] | 225 | */ |
Daniel Lezcano | 854d836 | 2007-09-25 19:18:04 -0700 | [diff] [blame] | 226 | static void loopback_setup(struct net_device *dev) |
| 227 | { |
| 228 | dev->get_stats = &get_stats; |
| 229 | dev->mtu = (16 * 1024) + 20 + 20 + 12; |
| 230 | dev->hard_start_xmit = loopback_xmit; |
Daniel Lezcano | 854d836 | 2007-09-25 19:18:04 -0700 | [diff] [blame] | 231 | dev->hard_header_len = ETH_HLEN; /* 14 */ |
| 232 | dev->addr_len = ETH_ALEN; /* 6 */ |
| 233 | dev->tx_queue_len = 0; |
| 234 | dev->type = ARPHRD_LOOPBACK; /* 0x0001*/ |
Daniel Lezcano | 854d836 | 2007-09-25 19:18:04 -0700 | [diff] [blame] | 235 | dev->flags = IFF_LOOPBACK; |
| 236 | dev->features = NETIF_F_SG | NETIF_F_FRAGLIST |
Chuck Ebbert | d2ae1d2 | 2005-07-02 21:28:21 -0400 | [diff] [blame] | 237 | #ifdef LOOPBACK_TSO |
Daniel Lezcano | 854d836 | 2007-09-25 19:18:04 -0700 | [diff] [blame] | 238 | | NETIF_F_TSO |
Chuck Ebbert | d2ae1d2 | 2005-07-02 21:28:21 -0400 | [diff] [blame] | 239 | #endif |
Daniel Lezcano | 854d836 | 2007-09-25 19:18:04 -0700 | [diff] [blame] | 240 | | NETIF_F_NO_CSUM |
| 241 | | NETIF_F_HIGHDMA |
| 242 | | NETIF_F_LLTX |
Emil Medve | 2d2c54e | 2007-12-27 08:17:22 -0600 | [diff] [blame] | 243 | | NETIF_F_NETNS_LOCAL; |
Daniel Lezcano | 854d836 | 2007-09-25 19:18:04 -0700 | [diff] [blame] | 244 | dev->ethtool_ops = &loopback_ethtool_ops; |
Stephen Hemminger | 3b04ddd | 2007-10-09 01:40:57 -0700 | [diff] [blame] | 245 | dev->header_ops = ð_header_ops; |
Eric W. Biederman | 5f6d88b | 2007-09-26 22:08:12 -0700 | [diff] [blame] | 246 | dev->init = loopback_dev_init; |
| 247 | dev->destructor = loopback_dev_free; |
Daniel Lezcano | 854d836 | 2007-09-25 19:18:04 -0700 | [diff] [blame] | 248 | } |
Daniel Lezcano | de3cb74 | 2007-09-25 19:16:28 -0700 | [diff] [blame] | 249 | |
Ralf Baechle | 2278364 | 2005-08-18 14:05:18 -0700 | [diff] [blame] | 250 | /* Setup and register the loopback device. */ |
Pavel Emelyanov | 4665079 | 2007-10-08 20:38:39 -0700 | [diff] [blame] | 251 | static __net_init int loopback_net_init(struct net *net) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 252 | { |
Daniel Lezcano | 854d836 | 2007-09-25 19:18:04 -0700 | [diff] [blame] | 253 | struct net_device *dev; |
| 254 | int err; |
Herbert Xu | aeed9e8 | 2007-07-30 16:37:19 -0700 | [diff] [blame] | 255 | |
Daniel Lezcano | 854d836 | 2007-09-25 19:18:04 -0700 | [diff] [blame] | 256 | err = -ENOMEM; |
| 257 | dev = alloc_netdev(0, "lo", loopback_setup); |
| 258 | if (!dev) |
| 259 | goto out; |
| 260 | |
YOSHIFUJI Hideaki | c346dca | 2008-03-25 21:47:49 +0900 | [diff] [blame^] | 261 | dev_net_set(dev, net); |
Daniel Lezcano | 854d836 | 2007-09-25 19:18:04 -0700 | [diff] [blame] | 262 | err = register_netdev(dev); |
| 263 | if (err) |
| 264 | goto out_free_netdev; |
| 265 | |
Eric W. Biederman | 2774c7a | 2007-09-26 22:10:56 -0700 | [diff] [blame] | 266 | net->loopback_dev = dev; |
Pavel Emelyanov | 9d6dda32 | 2007-10-15 12:55:33 -0700 | [diff] [blame] | 267 | return 0; |
Daniel Lezcano | 854d836 | 2007-09-25 19:18:04 -0700 | [diff] [blame] | 268 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 269 | |
Daniel Lezcano | 854d836 | 2007-09-25 19:18:04 -0700 | [diff] [blame] | 270 | out_free_netdev: |
| 271 | free_netdev(dev); |
Pavel Emelyanov | 9d6dda32 | 2007-10-15 12:55:33 -0700 | [diff] [blame] | 272 | out: |
| 273 | if (net == &init_net) |
| 274 | panic("loopback: Failed to register netdevice: %d\n", err); |
| 275 | return err; |
Daniel Lezcano | 854d836 | 2007-09-25 19:18:04 -0700 | [diff] [blame] | 276 | } |
Adrian Bunk | 60903f2 | 2007-01-02 00:35:48 -0800 | [diff] [blame] | 277 | |
Pavel Emelyanov | 4665079 | 2007-10-08 20:38:39 -0700 | [diff] [blame] | 278 | static __net_exit void loopback_net_exit(struct net *net) |
Eric W. Biederman | 2774c7a | 2007-09-26 22:10:56 -0700 | [diff] [blame] | 279 | { |
| 280 | struct net_device *dev = net->loopback_dev; |
Daniel Lezcano | 854d836 | 2007-09-25 19:18:04 -0700 | [diff] [blame] | 281 | |
Eric W. Biederman | 2774c7a | 2007-09-26 22:10:56 -0700 | [diff] [blame] | 282 | unregister_netdev(dev); |
| 283 | } |
| 284 | |
Denis V. Lunev | 022cbae | 2007-11-13 03:23:50 -0800 | [diff] [blame] | 285 | static struct pernet_operations __net_initdata loopback_net_ops = { |
Eric W. Biederman | 2774c7a | 2007-09-26 22:10:56 -0700 | [diff] [blame] | 286 | .init = loopback_net_init, |
| 287 | .exit = loopback_net_exit, |
| 288 | }; |
| 289 | |
| 290 | static int __init loopback_init(void) |
| 291 | { |
| 292 | return register_pernet_device(&loopback_net_ops); |
| 293 | } |
| 294 | |
Denis V. Lunev | 070ac3a | 2007-09-27 12:04:19 -0700 | [diff] [blame] | 295 | /* Loopback is special. It should be initialized before any other network |
| 296 | * device and network subsystem. |
| 297 | */ |
Eric W. Biederman | 2774c7a | 2007-09-26 22:10:56 -0700 | [diff] [blame] | 298 | fs_initcall(loopback_init); |