net: Introduce recvmmsg socket syscall
Meaning receive multiple messages, reducing the number of syscalls and
net stack entry/exit operations.
Next patches will introduce mechanisms where protocols that want to
optimize this operation will provide an unlocked_recvmsg operation.
This takes into account comments made by:
. Paul Moore: sock_recvmsg is called only for the first datagram,
sock_recvmsg_nosec is used for the rest.
. Caitlin Bestler: recvmmsg now has a struct timespec timeout, that
works in the same fashion as the ppoll one.
If the underlying protocol returns a datagram with MSG_OOB set, this
will make recvmmsg return right away with as many datagrams (+ the OOB
one) it has received so far.
. RĂ©mi Denis-Courmont & Steven Whitehouse: If we receive N < vlen
datagrams and then recvmsg returns an error, recvmmsg will return
the successfully received datagrams, store the error and return it
in the next call.
This paves the way for a subsequent optimization, sk_prot->unlocked_recvmsg,
where we will be able to acquire the lock only at batch start and end, not at
every underlying recvmsg call.
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/compat.c b/net/compat.c
index a407c3a..e13f525 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -727,10 +727,10 @@
/* Argument list sizes for compat_sys_socketcall */
#define AL(x) ((x) * sizeof(u32))
-static unsigned char nas[19]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
+static unsigned char nas[20]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3),
AL(3),AL(3),AL(4),AL(4),AL(4),AL(6),
AL(6),AL(2),AL(5),AL(5),AL(3),AL(3),
- AL(4)};
+ AL(4),AL(5)};
#undef AL
asmlinkage long compat_sys_sendmsg(int fd, struct compat_msghdr __user *msg, unsigned flags)
@@ -755,13 +755,36 @@
return sys_recvfrom(fd, buf, len, flags | MSG_CMSG_COMPAT, addr, addrlen);
}
+asmlinkage long compat_sys_recvmmsg(int fd, struct compat_mmsghdr __user *mmsg,
+ unsigned vlen, unsigned int flags,
+ struct timespec __user *timeout)
+{
+ int datagrams;
+ struct timespec ktspec;
+ struct compat_timespec __user *utspec =
+ (struct compat_timespec __user *)timeout;
+
+ if (get_user(ktspec.tv_sec, &utspec->tv_sec) ||
+ get_user(ktspec.tv_nsec, &utspec->tv_nsec))
+ return -EFAULT;
+
+ datagrams = __sys_recvmmsg(fd, (struct mmsghdr __user *)mmsg, vlen,
+ flags | MSG_CMSG_COMPAT, &ktspec);
+ if (datagrams > 0 &&
+ (put_user(ktspec.tv_sec, &utspec->tv_sec) ||
+ put_user(ktspec.tv_nsec, &utspec->tv_nsec)))
+ datagrams = -EFAULT;
+
+ return datagrams;
+}
+
asmlinkage long compat_sys_socketcall(int call, u32 __user *args)
{
int ret;
u32 a[6];
u32 a0, a1;
- if (call < SYS_SOCKET || call > SYS_ACCEPT4)
+ if (call < SYS_SOCKET || call > SYS_RECVMMSG)
return -EINVAL;
if (copy_from_user(a, args, nas[call]))
return -EFAULT;
@@ -823,6 +846,10 @@
case SYS_RECVMSG:
ret = compat_sys_recvmsg(a0, compat_ptr(a1), a[2]);
break;
+ case SYS_RECVMMSG:
+ ret = compat_sys_recvmmsg(a0, compat_ptr(a1), a[2], a[3],
+ compat_ptr(a[4]));
+ break;
case SYS_ACCEPT4:
ret = sys_accept4(a0, compat_ptr(a1), compat_ptr(a[2]), a[3]);
break;