[PACKET]: Add optional checksum computation for recvmsg
This patch is needed to make ISC's DHCP server (and probably other
DHCP servers/clients using AF_PACKET) to be able to serve another
client on the same Xen host.
The problem is that packets between different domains on the same
Xen host only have partial checksums. Unfortunately this piece of
information is not passed along in AF_PACKET unless you're using
the mmap interface. Since dhcpd doesn't support packet-mmap, UDP
packets from the same host come out with apparently bogus checksums.
This patch adds a mechanism for AF_PACKET recvmsg(2) to return the
status along with the packet. It does so by adding a new cmsg that
contains this information along with some other relevant data such
as the original packet length.
I didn't include the time stamp information since there is already
a cmsg for that.
This patch also changes the mmap code to set the CSUMNOTREADY flag
on all packets instead of just outoing packets on cooked sockets.
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 6dc01bd..8973ea7 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -200,7 +200,8 @@
#endif
struct packet_type prot_hook;
spinlock_t bind_lock;
- char running; /* prot_hook is attached*/
+ unsigned int running:1, /* prot_hook is attached*/
+ auxdata:1;
int ifindex; /* bound device */
__be16 num;
#ifdef CONFIG_PACKET_MULTICAST
@@ -214,6 +215,8 @@
#endif
};
+#define PACKET_SKB_CB(__skb) ((struct tpacket_auxdata *)((__skb)->cb))
+
#ifdef CONFIG_PACKET_MMAP
static inline char *packet_lookup_frame(struct packet_sock *po, unsigned int position)
@@ -462,6 +465,7 @@
u8 * skb_head = skb->data;
int skb_len = skb->len;
unsigned int snaplen, res;
+ struct tpacket_auxdata *aux;
if (skb->pkt_type == PACKET_LOOPBACK)
goto drop;
@@ -523,6 +527,15 @@
if (dev->hard_header_parse)
sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
+ aux = PACKET_SKB_CB(skb);
+ aux->tp_status = TP_STATUS_USER;
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ aux->tp_status |= TP_STATUS_CSUMNOTREADY;
+ aux->tp_len = skb->len;
+ aux->tp_snaplen = snaplen;
+ aux->tp_mac = 0;
+ aux->tp_net = skb->nh.raw - skb->data;
+
if (pskb_trim(skb, snaplen))
goto drop_n_acct;
@@ -582,11 +595,12 @@
else if (skb->pkt_type == PACKET_OUTGOING) {
/* Special case: outgoing packets have ll header at head */
skb_pull(skb, skb->nh.raw - skb->data);
- if (skb->ip_summed == CHECKSUM_PARTIAL)
- status |= TP_STATUS_CSUMNOTREADY;
}
}
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ status |= TP_STATUS_CSUMNOTREADY;
+
snaplen = skb->len;
res = run_filter(skb, sk, snaplen);
@@ -1119,6 +1133,11 @@
if (msg->msg_name)
memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
+ if (pkt_sk(sk)->auxdata) {
+ struct tpacket_auxdata *aux = PACKET_SKB_CB(skb);
+ put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(*aux), aux);
+ }
+
/*
* Free or return the buffer as appropriate. Again this
* hides all the races and re-entrancy issues from us.
@@ -1317,6 +1336,7 @@
packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
{
struct sock *sk = sock->sk;
+ struct packet_sock *po = pkt_sk(sk);
int ret;
if (level != SOL_PACKET)
@@ -1369,6 +1389,18 @@
return 0;
}
#endif
+ case PACKET_AUXDATA:
+ {
+ int val;
+
+ if (optlen < sizeof(val))
+ return -EINVAL;
+ if (copy_from_user(&val, optval, sizeof(val)))
+ return -EFAULT;
+
+ po->auxdata = !!val;
+ return 0;
+ }
default:
return -ENOPROTOOPT;
}
@@ -1378,8 +1410,11 @@
char __user *optval, int __user *optlen)
{
int len;
+ int val;
struct sock *sk = sock->sk;
struct packet_sock *po = pkt_sk(sk);
+ void *data;
+ struct tpacket_stats st;
if (level != SOL_PACKET)
return -ENOPROTOOPT;
@@ -1392,9 +1427,6 @@
switch(optname) {
case PACKET_STATISTICS:
- {
- struct tpacket_stats st;
-
if (len > sizeof(struct tpacket_stats))
len = sizeof(struct tpacket_stats);
spin_lock_bh(&sk->sk_receive_queue.lock);
@@ -1403,16 +1435,23 @@
spin_unlock_bh(&sk->sk_receive_queue.lock);
st.tp_packets += st.tp_drops;
- if (copy_to_user(optval, &st, len))
- return -EFAULT;
+ data = &st;
break;
- }
+ case PACKET_AUXDATA:
+ if (len > sizeof(int))
+ len = sizeof(int);
+ val = po->auxdata;
+
+ data = &val;
+ break;
default:
return -ENOPROTOOPT;
}
if (put_user(len, optlen))
return -EFAULT;
+ if (copy_to_user(optval, data, len))
+ return -EFAULT;
return 0;
}