sock: enable timestamping using control messages
Currently, SOL_TIMESTAMPING can only be enabled using setsockopt.
This is very costly when users want to sample writes to gather
tx timestamps.
Add support for enabling SO_TIMESTAMPING via control messages by
using tsflags added in `struct sockcm_cookie` (added in the previous
patches in this series) to set the tx_flags of the last skb created in
a sendmsg. With this patch, the timestamp recording bits in tx_flags
of the skbuff is overridden if SO_TIMESTAMPING is passed in a cmsg.
Please note that this is only effective for overriding the recording
timestamps flags. Users should enable timestamp reporting (e.g.,
SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_ID) using
socket options and then should ask for SOF_TIMESTAMPING_TX_*
using control messages per sendmsg to sample timestamps for each
write.
Signed-off-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 510e90a..9abc36b 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -861,7 +861,8 @@
goto drop;
if (skb->sk && sk_fullsock(skb->sk)) {
- sock_tx_timestamp(skb->sk, &skb_shinfo(skb)->tx_flags);
+ sock_tx_timestamp(skb->sk, skb->sk->sk_tsflags,
+ &skb_shinfo(skb)->tx_flags);
sw_tx_timestamp(skb);
}
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index d0aeb97..55ee1eb 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -867,7 +867,8 @@
int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen, int hlimit,
int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
- struct rt6_info *rt, unsigned int flags, int dontfrag);
+ struct rt6_info *rt, unsigned int flags, int dontfrag,
+ const struct sockcm_cookie *sockc);
int ip6_push_pending_frames(struct sock *sk);
@@ -884,7 +885,8 @@
void *from, int length, int transhdrlen,
int hlimit, int tclass, struct ipv6_txoptions *opt,
struct flowi6 *fl6, struct rt6_info *rt,
- unsigned int flags, int dontfrag);
+ unsigned int flags, int dontfrag,
+ const struct sockcm_cookie *sockc);
static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
{
diff --git a/include/net/sock.h b/include/net/sock.h
index af012da..e91b87f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2057,19 +2057,21 @@
sk->sk_stamp = skb->tstamp;
}
-void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags);
+void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags);
/**
* sock_tx_timestamp - checks whether the outgoing packet is to be time stamped
* @sk: socket sending this packet
+ * @tsflags: timestamping flags to use
* @tx_flags: completed with instructions for time stamping
*
* Note : callers should take care of initial *tx_flags value (usually 0)
*/
-static inline void sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags)
+static inline void sock_tx_timestamp(const struct sock *sk, __u16 tsflags,
+ __u8 *tx_flags)
{
- if (unlikely(sk->sk_tsflags))
- __sock_tx_timestamp(sk, tx_flags);
+ if (unlikely(tsflags))
+ __sock_tx_timestamp(tsflags, tx_flags);
if (unlikely(sock_flag(sk, SOCK_WIFI_STATUS)))
*tx_flags |= SKBTX_WIFI_STATUS;
}
diff --git a/net/can/raw.c b/net/can/raw.c
index 2e67b14..972c187 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -755,7 +755,7 @@
if (err < 0)
goto free_skb;
- sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
+ sock_tx_timestamp(sk, sk->sk_tsflags, &skb_shinfo(skb)->tx_flags);
skb->dev = dev;
skb->sk = sk;
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 670639b..66ddcb6 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -737,6 +737,7 @@
/* no remote port */
}
+ ipc.sockc.tsflags = sk->sk_tsflags;
ipc.addr = inet->inet_saddr;
ipc.opt = NULL;
ipc.oif = sk->sk_bound_dev_if;
@@ -744,8 +745,6 @@
ipc.ttl = 0;
ipc.tos = -1;
- sock_tx_timestamp(sk, &ipc.tx_flags);
-
if (msg->msg_controllen) {
err = ip_cmsg_send(sk, msg, &ipc, false);
if (unlikely(err)) {
@@ -768,6 +767,8 @@
rcu_read_unlock();
}
+ sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags);
+
saddr = ipc.addr;
ipc.addr = faddr = daddr;
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 088ce66..438f50c 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -339,8 +339,8 @@
static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
struct msghdr *msg, size_t length,
- struct rtable **rtp,
- unsigned int flags)
+ struct rtable **rtp, unsigned int flags,
+ const struct sockcm_cookie *sockc)
{
struct inet_sock *inet = inet_sk(sk);
struct net *net = sock_net(sk);
@@ -379,7 +379,7 @@
skb->ip_summed = CHECKSUM_NONE;
- sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
+ sock_tx_timestamp(sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags);
skb->transport_header = skb->network_header;
err = -EFAULT;
@@ -540,6 +540,7 @@
daddr = inet->inet_daddr;
}
+ ipc.sockc.tsflags = sk->sk_tsflags;
ipc.addr = inet->inet_saddr;
ipc.opt = NULL;
ipc.tx_flags = 0;
@@ -638,10 +639,10 @@
if (inet->hdrincl)
err = raw_send_hdrinc(sk, &fl4, msg, len,
- &rt, msg->msg_flags);
+ &rt, msg->msg_flags, &ipc.sockc);
else {
- sock_tx_timestamp(sk, &ipc.tx_flags);
+ sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags);
if (!ipc.addr)
ipc.addr = fl4.daddr;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ce3c9eb..4d73858 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -428,13 +428,13 @@
}
EXPORT_SYMBOL(tcp_init_sock);
-static void tcp_tx_timestamp(struct sock *sk, struct sk_buff *skb)
+static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb)
{
- if (sk->sk_tsflags) {
+ if (sk->sk_tsflags || tsflags) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
- sock_tx_timestamp(sk, &shinfo->tx_flags);
+ sock_tx_timestamp(sk, tsflags, &shinfo->tx_flags);
if (shinfo->tx_flags & SKBTX_ANY_TSTAMP)
shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1;
tcb->txstamp_ack = !!(shinfo->tx_flags & SKBTX_ACK_TSTAMP);
@@ -959,7 +959,7 @@
offset += copy;
size -= copy;
if (!size) {
- tcp_tx_timestamp(sk, skb);
+ tcp_tx_timestamp(sk, sk->sk_tsflags, skb);
goto out;
}
@@ -1079,6 +1079,7 @@
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
+ struct sockcm_cookie sockc;
int flags, err, copied = 0;
int mss_now = 0, size_goal, copied_syn = 0;
bool sg;
@@ -1121,6 +1122,15 @@
/* 'common' sending to sendq */
}
+ sockc.tsflags = sk->sk_tsflags;
+ if (msg->msg_controllen) {
+ err = sock_cmsg_send(sk, msg, &sockc);
+ if (unlikely(err)) {
+ err = -EINVAL;
+ goto out_err;
+ }
+ }
+
/* This should be in poll */
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
@@ -1239,7 +1249,7 @@
copied += copy;
if (!msg_data_left(msg)) {
- tcp_tx_timestamp(sk, skb);
+ tcp_tx_timestamp(sk, sockc.tsflags, skb);
goto out;
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index bccb4e1..45ff590 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1027,12 +1027,11 @@
*/
connected = 1;
}
+
+ ipc.sockc.tsflags = sk->sk_tsflags;
ipc.addr = inet->inet_saddr;
-
ipc.oif = sk->sk_bound_dev_if;
- sock_tx_timestamp(sk, &ipc.tx_flags);
-
if (msg->msg_controllen) {
err = ip_cmsg_send(sk, msg, &ipc, sk->sk_family == AF_INET6);
if (unlikely(err)) {
@@ -1059,6 +1058,8 @@
saddr = ipc.addr;
ipc.addr = faddr = daddr;
+ sock_tx_timestamp(sk, ipc.sockc.tsflags, &ipc.tx_flags);
+
if (ipc.opt && ipc.opt->opt.srr) {
if (!daddr)
return -EINVAL;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 0a37ddc..6b573eb 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -400,6 +400,7 @@
struct icmp6hdr tmp_hdr;
struct flowi6 fl6;
struct icmpv6_msg msg;
+ struct sockcm_cookie sockc_unused = {0};
int iif = 0;
int addr_type = 0;
int len;
@@ -527,7 +528,7 @@
len + sizeof(struct icmp6hdr),
sizeof(struct icmp6hdr), hlimit,
np->tclass, NULL, &fl6, (struct rt6_info *)dst,
- MSG_DONTWAIT, np->dontfrag);
+ MSG_DONTWAIT, np->dontfrag, &sockc_unused);
if (err) {
ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
ip6_flush_pending_frames(sk);
@@ -566,6 +567,7 @@
int hlimit;
u8 tclass;
u32 mark = IP6_REPLY_MARK(net, skb->mark);
+ struct sockcm_cookie sockc_unused = {0};
saddr = &ipv6_hdr(skb)->daddr;
@@ -617,7 +619,7 @@
err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr),
sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl6,
(struct rt6_info *)dst, MSG_DONTWAIT,
- np->dontfrag);
+ np->dontfrag, &sockc_unused);
if (err) {
ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 9428345d..612f3d1 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1258,7 +1258,8 @@
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen,
- unsigned int flags, int dontfrag)
+ unsigned int flags, int dontfrag,
+ const struct sockcm_cookie *sockc)
{
struct sk_buff *skb, *skb_prev = NULL;
unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;
@@ -1329,7 +1330,7 @@
csummode = CHECKSUM_PARTIAL;
if (sk->sk_type == SOCK_DGRAM || sk->sk_type == SOCK_RAW) {
- sock_tx_timestamp(sk, &tx_flags);
+ sock_tx_timestamp(sk, sockc->tsflags, &tx_flags);
if (tx_flags & SKBTX_ANY_SW_TSTAMP &&
sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
tskey = sk->sk_tskey++;
@@ -1565,7 +1566,8 @@
int odd, struct sk_buff *skb),
void *from, int length, int transhdrlen, int hlimit,
int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
- struct rt6_info *rt, unsigned int flags, int dontfrag)
+ struct rt6_info *rt, unsigned int flags, int dontfrag,
+ const struct sockcm_cookie *sockc)
{
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
@@ -1593,7 +1595,8 @@
return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
&np->cork, sk_page_frag(sk), getfrag,
- from, length, transhdrlen, flags, dontfrag);
+ from, length, transhdrlen, flags, dontfrag,
+ sockc);
}
EXPORT_SYMBOL_GPL(ip6_append_data);
@@ -1752,7 +1755,7 @@
int hlimit, int tclass,
struct ipv6_txoptions *opt, struct flowi6 *fl6,
struct rt6_info *rt, unsigned int flags,
- int dontfrag)
+ int dontfrag, const struct sockcm_cookie *sockc)
{
struct inet_cork_full cork;
struct inet6_cork v6_cork;
@@ -1779,7 +1782,7 @@
err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
¤t->task_frag, getfrag, from,
length + exthdrlen, transhdrlen + exthdrlen,
- flags, dontfrag);
+ flags, dontfrag, sockc);
if (err) {
__ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
return ERR_PTR(err);
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index c382db7..da1cff7 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -62,6 +62,7 @@
struct dst_entry *dst;
struct rt6_info *rt;
struct pingfakehdr pfh;
+ struct sockcm_cookie junk = {0};
pr_debug("ping_v6_sendmsg(sk=%p,sk->num=%u)\n", inet, inet->inet_num);
@@ -144,7 +145,7 @@
err = ip6_append_data(sk, ping_getfrag, &pfh, len,
0, hlimit,
np->tclass, NULL, &fl6, rt,
- MSG_DONTWAIT, np->dontfrag);
+ MSG_DONTWAIT, np->dontfrag, &junk);
if (err) {
ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev,
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index f175ec0..b07ce21 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -822,8 +822,7 @@
if (fl6.flowi6_oif == 0)
fl6.flowi6_oif = sk->sk_bound_dev_if;
- sockc.tsflags = 0;
-
+ sockc.tsflags = sk->sk_tsflags;
if (msg->msg_controllen) {
opt = &opt_space;
memset(opt, 0, sizeof(struct ipv6_txoptions));
@@ -901,7 +900,7 @@
lock_sock(sk);
err = ip6_append_data(sk, raw6_getfrag, &rfv,
len, 0, hlimit, tclass, opt, &fl6, (struct rt6_info *)dst,
- msg->msg_flags, dontfrag);
+ msg->msg_flags, dontfrag, &sockc);
if (err)
ip6_flush_pending_frames(sk);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 2a787af..b772a76 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1248,7 +1248,7 @@
fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
fl6.flowi6_mark = sk->sk_mark;
- sockc.tsflags = 0;
+ sockc.tsflags = sk->sk_tsflags;
if (msg->msg_controllen) {
opt = &opt_space;
@@ -1324,7 +1324,7 @@
skb = ip6_make_skb(sk, getfrag, msg, ulen,
sizeof(struct udphdr), hlimit, tclass, opt,
&fl6, (struct rt6_info *)dst,
- msg->msg_flags, dontfrag);
+ msg->msg_flags, dontfrag, &sockc);
err = PTR_ERR(skb);
if (!IS_ERR_OR_NULL(skb))
err = udp_v6_send_skb(skb, &fl6);
@@ -1351,7 +1351,8 @@
err = ip6_append_data(sk, getfrag, msg, ulen,
sizeof(struct udphdr), hlimit, tclass, opt, &fl6,
(struct rt6_info *)dst,
- corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag);
+ corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags, dontfrag,
+ &sockc);
if (err)
udp_v6_flush_pending_frames(sk);
else if (!corkreq)
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 4f29a4a..1a38f20 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -627,7 +627,7 @@
err = ip6_append_data(sk, ip_generic_getfrag, msg,
ulen, transhdrlen, hlimit, tclass, opt,
&fl6, (struct rt6_info *)dst,
- msg->msg_flags, dontfrag);
+ msg->msg_flags, dontfrag, &sockc_unused);
if (err)
ip6_flush_pending_frames(sk);
else if (!(msg->msg_flags & MSG_MORE))
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 1ecfa71..0007e23 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1837,6 +1837,7 @@
DECLARE_SOCKADDR(struct sockaddr_pkt *, saddr, msg->msg_name);
struct sk_buff *skb = NULL;
struct net_device *dev;
+ struct sockcm_cookie sockc;
__be16 proto = 0;
int err;
int extra_len = 0;
@@ -1925,12 +1926,21 @@
goto out_unlock;
}
+ sockc.tsflags = 0;
+ if (msg->msg_controllen) {
+ err = sock_cmsg_send(sk, msg, &sockc);
+ if (unlikely(err)) {
+ err = -EINVAL;
+ goto out_unlock;
+ }
+ }
+
skb->protocol = proto;
skb->dev = dev;
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
- sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
+ sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags);
if (unlikely(extra_len == 4))
skb->no_fcs = 1;
@@ -2486,7 +2496,8 @@
static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
void *frame, struct net_device *dev, void *data, int tp_len,
- __be16 proto, unsigned char *addr, int hlen, int copylen)
+ __be16 proto, unsigned char *addr, int hlen, int copylen,
+ const struct sockcm_cookie *sockc)
{
union tpacket_uhdr ph;
int to_write, offset, len, nr_frags, len_max;
@@ -2500,7 +2511,7 @@
skb->dev = dev;
skb->priority = po->sk.sk_priority;
skb->mark = po->sk.sk_mark;
- sock_tx_timestamp(&po->sk, &skb_shinfo(skb)->tx_flags);
+ sock_tx_timestamp(&po->sk, sockc->tsflags, &skb_shinfo(skb)->tx_flags);
skb_shinfo(skb)->destructor_arg = ph.raw;
skb_reserve(skb, hlen);
@@ -2624,6 +2635,7 @@
struct sk_buff *skb;
struct net_device *dev;
struct virtio_net_hdr *vnet_hdr = NULL;
+ struct sockcm_cookie sockc;
__be16 proto;
int err, reserve = 0;
void *ph;
@@ -2655,6 +2667,13 @@
dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
}
+ sockc.tsflags = 0;
+ if (msg->msg_controllen) {
+ err = sock_cmsg_send(&po->sk, msg, &sockc);
+ if (unlikely(err))
+ goto out;
+ }
+
err = -ENXIO;
if (unlikely(dev == NULL))
goto out;
@@ -2712,7 +2731,7 @@
goto out_status;
}
tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto,
- addr, hlen, copylen);
+ addr, hlen, copylen, &sockc);
if (likely(tp_len >= 0) &&
tp_len > dev->mtu + reserve &&
!po->has_vnet_hdr &&
@@ -2851,6 +2870,7 @@
if (unlikely(!(dev->flags & IFF_UP)))
goto out_unlock;
+ sockc.tsflags = 0;
sockc.mark = sk->sk_mark;
if (msg->msg_controllen) {
err = sock_cmsg_send(sk, msg, &sockc);
@@ -2908,7 +2928,7 @@
goto out_free;
}
- sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
+ sock_tx_timestamp(sk, sockc.tsflags, &skb_shinfo(skb)->tx_flags);
if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) &&
!packet_extra_vlan_len_allowed(dev, skb)) {
diff --git a/net/socket.c b/net/socket.c
index 5f77a8e..979d314 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -587,20 +587,20 @@
}
EXPORT_SYMBOL(sock_release);
-void __sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags)
+void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
{
u8 flags = *tx_flags;
- if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
+ if (tsflags & SOF_TIMESTAMPING_TX_HARDWARE)
flags |= SKBTX_HW_TSTAMP;
- if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
+ if (tsflags & SOF_TIMESTAMPING_TX_SOFTWARE)
flags |= SKBTX_SW_TSTAMP;
- if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_SCHED)
+ if (tsflags & SOF_TIMESTAMPING_TX_SCHED)
flags |= SKBTX_SCHED_TSTAMP;
- if (sk->sk_tsflags & SOF_TIMESTAMPING_TX_ACK)
+ if (tsflags & SOF_TIMESTAMPING_TX_ACK)
flags |= SKBTX_ACK_TSTAMP;
*tx_flags = flags;