Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says:
====================
Netfilter updates for net-next
The following patchset contains the first batch of Netfilter updates for
the upcoming 4.5 kernel. This batch contains userspace netfilter header
compilation fixes, support for packet mangling in nf_tables, the new
tracing infrastructure for nf_tables and cgroup2 support for iptables.
More specifically, they are:
1) Two patches to include dependencies in our netfilter userspace
headers to resolve compilation problems, from Mikko Rapeli.
2) Four comestic cleanup patches for the ebtables codebase, from Ian Morris.
3) Remove duplicate include in the netfilter reject infrastructure,
from Stephen Hemminger.
4) Two patches to simplify the netfilter defragmentation code for IPv6,
patch from Florian Westphal.
5) Fix root ownership of /proc/net netfilter for unpriviledged net
namespaces, from Philip Whineray.
6) Get rid of unused fields in struct nft_pktinfo, from Florian Westphal.
7) Add mangling support to our nf_tables payload expression, from
Patrick McHardy.
8) Introduce a new netlink-based tracing infrastructure for nf_tables,
from Florian Westphal.
9) Change setter functions in nfnetlink_log to be void, from
Rami Rosen.
10) Add netns support to the cttimeout infrastructure.
11) Add cgroup2 support to iptables, from Tejun Heo.
12) Introduce nfnl_dereference_protected() in nfnetlink, from Florian.
13) Add support for mangling pkttype in the nf_tables meta expression,
also from Florian.
BTW, I need that you pull net into net-next, I have another batch that
requires changes that I don't yet see in net.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/linux/netfilter/nf_conntrack_sctp.h b/include/linux/netfilter/nf_conntrack_sctp.h
new file mode 100644
index 0000000..22a16a2
--- /dev/null
+++ b/include/linux/netfilter/nf_conntrack_sctp.h
@@ -0,0 +1,13 @@
+#ifndef _NF_CONNTRACK_SCTP_H
+#define _NF_CONNTRACK_SCTP_H
+/* SCTP tracking. */
+
+#include <uapi/linux/netfilter/nf_conntrack_sctp.h>
+
+struct ip_ct_sctp {
+ enum sctp_conntrack state;
+
+ __be32 vtag[IP_CT_DIR_MAX];
+};
+
+#endif /* _NF_CONNTRACK_SCTP_H */
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 2dcea63..4089abc 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -121,6 +121,9 @@
#if IS_ENABLED(CONFIG_NETFILTER_NETLINK_ACCT)
struct list_head nfnl_acct_list;
#endif
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+ struct list_head nfct_timeout_list;
+#endif
#endif
#ifdef CONFIG_WEXT_CORE
struct sk_buff_head wext_nlevents;
diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
index fb7da5b..ddf162f 100644
--- a/include/net/netfilter/ipv6/nf_defrag_ipv6.h
+++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h
@@ -5,8 +5,7 @@
int nf_ct_frag6_init(void);
void nf_ct_frag6_cleanup(void);
-struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user);
-void nf_ct_frag6_consume_orig(struct sk_buff *skb);
+int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user);
struct inet_frags_ctl;
diff --git a/include/net/netfilter/nf_conntrack_timeout.h b/include/net/netfilter/nf_conntrack_timeout.h
index f72be38..5cc5e9e 100644
--- a/include/net/netfilter/nf_conntrack_timeout.h
+++ b/include/net/netfilter/nf_conntrack_timeout.h
@@ -104,7 +104,7 @@
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-extern struct ctnl_timeout *(*nf_ct_timeout_find_get_hook)(const char *name);
+extern struct ctnl_timeout *(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name);
extern void (*nf_ct_timeout_put_hook)(struct ctnl_timeout *timeout);
#endif
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 4bd7508..b313cda 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -19,8 +19,6 @@
const struct net_device *out;
u8 pf;
u8 hook;
- u8 nhoff;
- u8 thoff;
u8 tprot;
/* for x_tables compatibility */
struct xt_action_param xt;
@@ -890,6 +888,38 @@
int nft_register_expr(struct nft_expr_type *);
void nft_unregister_expr(struct nft_expr_type *);
+int nft_verdict_dump(struct sk_buff *skb, int type,
+ const struct nft_verdict *v);
+
+/**
+ * struct nft_traceinfo - nft tracing information and state
+ *
+ * @pkt: pktinfo currently processed
+ * @basechain: base chain currently processed
+ * @chain: chain currently processed
+ * @rule: rule that was evaluated
+ * @verdict: verdict given by rule
+ * @type: event type (enum nft_trace_types)
+ * @packet_dumped: packet headers sent in a previous traceinfo message
+ * @trace: other struct members are initialised
+ */
+struct nft_traceinfo {
+ const struct nft_pktinfo *pkt;
+ const struct nft_base_chain *basechain;
+ const struct nft_chain *chain;
+ const struct nft_rule *rule;
+ const struct nft_verdict *verdict;
+ enum nft_trace_types type;
+ bool packet_dumped;
+ bool trace;
+};
+
+void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt,
+ const struct nft_verdict *verdict,
+ const struct nft_chain *basechain);
+
+void nft_trace_notify(struct nft_traceinfo *info);
+
#define nft_dereference(p) \
nfnl_dereference(p, NFNL_SUBSYS_NFTABLES)
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index c6f400c..a9060dd 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -47,7 +47,17 @@
enum nft_registers dreg:8;
};
+struct nft_payload_set {
+ enum nft_payload_bases base:8;
+ u8 offset;
+ u8 len;
+ enum nft_registers sreg:8;
+ u8 csum_type;
+ u8 csum_offset;
+};
+
extern const struct nft_expr_ops nft_payload_fast_ops;
+extern struct static_key_false nft_trace_enabled;
int nft_payload_module_init(void);
void nft_payload_module_exit(void);
diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h
index 711887a..d27588c 100644
--- a/include/net/netfilter/nft_meta.h
+++ b/include/net/netfilter/nft_meta.h
@@ -33,4 +33,7 @@
struct nft_regs *regs,
const struct nft_pktinfo *pkt);
+void nft_meta_set_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr);
+
#endif
diff --git a/include/uapi/linux/netfilter/ipset/ip_set_bitmap.h b/include/uapi/linux/netfilter/ipset/ip_set_bitmap.h
index 6a2c038..fd5024d 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set_bitmap.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set_bitmap.h
@@ -1,6 +1,8 @@
#ifndef _UAPI__IP_SET_BITMAP_H
#define _UAPI__IP_SET_BITMAP_H
+#include <linux/netfilter/ipset/ip_set.h>
+
/* Bitmap type specific error codes */
enum {
/* The element is out of the range of the set */
diff --git a/include/uapi/linux/netfilter/ipset/ip_set_hash.h b/include/uapi/linux/netfilter/ipset/ip_set_hash.h
index 352eecc..82deeb8 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set_hash.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set_hash.h
@@ -1,6 +1,8 @@
#ifndef _UAPI__IP_SET_HASH_H
#define _UAPI__IP_SET_HASH_H
+#include <linux/netfilter/ipset/ip_set.h>
+
/* Hash type specific error codes */
enum {
/* Hash is full */
diff --git a/include/uapi/linux/netfilter/ipset/ip_set_list.h b/include/uapi/linux/netfilter/ipset/ip_set_list.h
index a44efaa..84d4303 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set_list.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set_list.h
@@ -1,6 +1,8 @@
#ifndef _UAPI__IP_SET_LIST_H
#define _UAPI__IP_SET_LIST_H
+#include <linux/netfilter/ipset/ip_set.h>
+
/* List type specific error codes */
enum {
/* Set name to be added/deleted/tested does not exist. */
diff --git a/include/uapi/linux/netfilter/nf_conntrack_sctp.h b/include/uapi/linux/netfilter/nf_conntrack_sctp.h
index ed4e776..2cbc366 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_sctp.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_sctp.h
@@ -1,5 +1,5 @@
-#ifndef _NF_CONNTRACK_SCTP_H
-#define _NF_CONNTRACK_SCTP_H
+#ifndef _UAPI_NF_CONNTRACK_SCTP_H
+#define _UAPI_NF_CONNTRACK_SCTP_H
/* SCTP tracking. */
#include <linux/netfilter/nf_conntrack_tuple_common.h>
@@ -18,10 +18,4 @@
SCTP_CONNTRACK_MAX
};
-struct ip_ct_sctp {
- enum sctp_conntrack state;
-
- __be32 vtag[IP_CT_DIR_MAX];
-};
-
-#endif /* _NF_CONNTRACK_SCTP_H */
+#endif /* _UAPI_NF_CONNTRACK_SCTP_H */
diff --git a/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h b/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h
index 2f6bbc5..a9c3834 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_tuple_common.h
@@ -1,6 +1,9 @@
#ifndef _NF_CONNTRACK_TUPLE_COMMON_H
#define _NF_CONNTRACK_TUPLE_COMMON_H
+#include <linux/types.h>
+#include <linux/netfilter.h>
+
enum ip_conntrack_dir {
IP_CT_DIR_ORIGINAL,
IP_CT_DIR_REPLY,
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index d8c8a7c..b48a3ab 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -83,6 +83,7 @@
* @NFT_MSG_DELSETELEM: delete a set element (enum nft_set_elem_attributes)
* @NFT_MSG_NEWGEN: announce a new generation, only for events (enum nft_gen_attributes)
* @NFT_MSG_GETGEN: get the rule-set generation (enum nft_gen_attributes)
+ * @NFT_MSG_TRACE: trace event (enum nft_trace_attributes)
*/
enum nf_tables_msg_types {
NFT_MSG_NEWTABLE,
@@ -102,6 +103,7 @@
NFT_MSG_DELSETELEM,
NFT_MSG_NEWGEN,
NFT_MSG_GETGEN,
+ NFT_MSG_TRACE,
NFT_MSG_MAX,
};
@@ -598,12 +600,26 @@
};
/**
+ * enum nft_payload_csum_types - nf_tables payload expression checksum types
+ *
+ * @NFT_PAYLOAD_CSUM_NONE: no checksumming
+ * @NFT_PAYLOAD_CSUM_INET: internet checksum (RFC 791)
+ */
+enum nft_payload_csum_types {
+ NFT_PAYLOAD_CSUM_NONE,
+ NFT_PAYLOAD_CSUM_INET,
+};
+
+/**
* enum nft_payload_attributes - nf_tables payload expression netlink attributes
*
* @NFTA_PAYLOAD_DREG: destination register to load data into (NLA_U32: nft_registers)
* @NFTA_PAYLOAD_BASE: payload base (NLA_U32: nft_payload_bases)
* @NFTA_PAYLOAD_OFFSET: payload offset relative to base (NLA_U32)
* @NFTA_PAYLOAD_LEN: payload length (NLA_U32)
+ * @NFTA_PAYLOAD_SREG: source register to load data from (NLA_U32: nft_registers)
+ * @NFTA_PAYLOAD_CSUM_TYPE: checksum type (NLA_U32)
+ * @NFTA_PAYLOAD_CSUM_OFFSET: checksum offset relative to base (NLA_U32)
*/
enum nft_payload_attributes {
NFTA_PAYLOAD_UNSPEC,
@@ -611,6 +627,9 @@
NFTA_PAYLOAD_BASE,
NFTA_PAYLOAD_OFFSET,
NFTA_PAYLOAD_LEN,
+ NFTA_PAYLOAD_SREG,
+ NFTA_PAYLOAD_CSUM_TYPE,
+ NFTA_PAYLOAD_CSUM_OFFSET,
__NFTA_PAYLOAD_MAX
};
#define NFTA_PAYLOAD_MAX (__NFTA_PAYLOAD_MAX - 1)
@@ -970,4 +989,54 @@
};
#define NFTA_GEN_MAX (__NFTA_GEN_MAX - 1)
+/**
+ * enum nft_trace_attributes - nf_tables trace netlink attributes
+ *
+ * @NFTA_TRACE_TABLE: name of the table (NLA_STRING)
+ * @NFTA_TRACE_CHAIN: name of the chain (NLA_STRING)
+ * @NFTA_TRACE_RULE_HANDLE: numeric handle of the rule (NLA_U64)
+ * @NFTA_TRACE_TYPE: type of the event (NLA_U32: nft_trace_types)
+ * @NFTA_TRACE_VERDICT: verdict returned by hook (NLA_NESTED: nft_verdicts)
+ * @NFTA_TRACE_ID: pseudo-id, same for each skb traced (NLA_U32)
+ * @NFTA_TRACE_LL_HEADER: linklayer header (NLA_BINARY)
+ * @NFTA_TRACE_NETWORK_HEADER: network header (NLA_BINARY)
+ * @NFTA_TRACE_TRANSPORT_HEADER: transport header (NLA_BINARY)
+ * @NFTA_TRACE_IIF: indev ifindex (NLA_U32)
+ * @NFTA_TRACE_IIFTYPE: netdev->type of indev (NLA_U16)
+ * @NFTA_TRACE_OIF: outdev ifindex (NLA_U32)
+ * @NFTA_TRACE_OIFTYPE: netdev->type of outdev (NLA_U16)
+ * @NFTA_TRACE_MARK: nfmark (NLA_U32)
+ * @NFTA_TRACE_NFPROTO: nf protocol processed (NLA_U32)
+ * @NFTA_TRACE_POLICY: policy that decided fate of packet (NLA_U32)
+ */
+enum nft_trace_attibutes {
+ NFTA_TRACE_UNSPEC,
+ NFTA_TRACE_TABLE,
+ NFTA_TRACE_CHAIN,
+ NFTA_TRACE_RULE_HANDLE,
+ NFTA_TRACE_TYPE,
+ NFTA_TRACE_VERDICT,
+ NFTA_TRACE_ID,
+ NFTA_TRACE_LL_HEADER,
+ NFTA_TRACE_NETWORK_HEADER,
+ NFTA_TRACE_TRANSPORT_HEADER,
+ NFTA_TRACE_IIF,
+ NFTA_TRACE_IIFTYPE,
+ NFTA_TRACE_OIF,
+ NFTA_TRACE_OIFTYPE,
+ NFTA_TRACE_MARK,
+ NFTA_TRACE_NFPROTO,
+ NFTA_TRACE_POLICY,
+ __NFTA_TRACE_MAX
+};
+#define NFTA_TRACE_MAX (__NFTA_TRACE_MAX - 1)
+
+enum nft_trace_types {
+ NFT_TRACETYPE_UNSPEC,
+ NFT_TRACETYPE_POLICY,
+ NFT_TRACETYPE_RETURN,
+ NFT_TRACETYPE_RULE,
+ __NFT_TRACETYPE_MAX
+};
+#define NFT_TRACETYPE_MAX (__NFT_TRACETYPE_MAX - 1)
#endif /* _LINUX_NF_TABLES_H */
diff --git a/include/uapi/linux/netfilter/nfnetlink.h b/include/uapi/linux/netfilter/nfnetlink.h
index 354a7e5..4bb8cb7 100644
--- a/include/uapi/linux/netfilter/nfnetlink.h
+++ b/include/uapi/linux/netfilter/nfnetlink.h
@@ -22,6 +22,8 @@
#define NFNLGRP_NFTABLES NFNLGRP_NFTABLES
NFNLGRP_ACCT_QUOTA,
#define NFNLGRP_ACCT_QUOTA NFNLGRP_ACCT_QUOTA
+ NFNLGRP_NFTRACE,
+#define NFNLGRP_NFTRACE NFNLGRP_NFTRACE
__NFNLGRP_MAX,
};
#define NFNLGRP_MAX (__NFNLGRP_MAX - 1)
diff --git a/include/uapi/linux/netfilter/xt_HMARK.h b/include/uapi/linux/netfilter/xt_HMARK.h
index 826fc58..3fb48c8 100644
--- a/include/uapi/linux/netfilter/xt_HMARK.h
+++ b/include/uapi/linux/netfilter/xt_HMARK.h
@@ -2,6 +2,7 @@
#define XT_HMARK_H_
#include <linux/types.h>
+#include <linux/netfilter.h>
enum {
XT_HMARK_SADDR_MASK,
diff --git a/include/uapi/linux/netfilter/xt_RATEEST.h b/include/uapi/linux/netfilter/xt_RATEEST.h
index 6605e20..ec1b570 100644
--- a/include/uapi/linux/netfilter/xt_RATEEST.h
+++ b/include/uapi/linux/netfilter/xt_RATEEST.h
@@ -2,6 +2,7 @@
#define _XT_RATEEST_TARGET_H
#include <linux/types.h>
+#include <linux/if.h>
struct xt_rateest_target_info {
char name[IFNAMSIZ];
diff --git a/include/uapi/linux/netfilter/xt_TEE.h b/include/uapi/linux/netfilter/xt_TEE.h
index 5c21d5c..0109202 100644
--- a/include/uapi/linux/netfilter/xt_TEE.h
+++ b/include/uapi/linux/netfilter/xt_TEE.h
@@ -1,6 +1,8 @@
#ifndef _XT_TEE_TARGET_H
#define _XT_TEE_TARGET_H
+#include <linux/netfilter.h>
+
struct xt_tee_tginfo {
union nf_inet_addr gw;
char oif[16];
diff --git a/include/uapi/linux/netfilter/xt_TPROXY.h b/include/uapi/linux/netfilter/xt_TPROXY.h
index 902043c..8d693ee 100644
--- a/include/uapi/linux/netfilter/xt_TPROXY.h
+++ b/include/uapi/linux/netfilter/xt_TPROXY.h
@@ -2,6 +2,7 @@
#define _XT_TPROXY_H
#include <linux/types.h>
+#include <linux/netfilter.h>
/* TPROXY target is capable of marking the packet to perform
* redirection. We can get rid of that whenever we get support for
diff --git a/include/uapi/linux/netfilter/xt_cgroup.h b/include/uapi/linux/netfilter/xt_cgroup.h
index 43acb7e..1e4b37b 100644
--- a/include/uapi/linux/netfilter/xt_cgroup.h
+++ b/include/uapi/linux/netfilter/xt_cgroup.h
@@ -2,10 +2,23 @@
#define _UAPI_XT_CGROUP_H
#include <linux/types.h>
+#include <linux/limits.h>
-struct xt_cgroup_info {
+struct xt_cgroup_info_v0 {
__u32 id;
__u32 invert;
};
+struct xt_cgroup_info_v1 {
+ __u8 has_path;
+ __u8 has_classid;
+ __u8 invert_path;
+ __u8 invert_classid;
+ char path[PATH_MAX];
+ __u32 classid;
+
+ /* kernel internal data */
+ void *priv __attribute__((aligned(8)));
+};
+
#endif /* _UAPI_XT_CGROUP_H */
diff --git a/include/uapi/linux/netfilter/xt_hashlimit.h b/include/uapi/linux/netfilter/xt_hashlimit.h
index cbfc43d..6db9037 100644
--- a/include/uapi/linux/netfilter/xt_hashlimit.h
+++ b/include/uapi/linux/netfilter/xt_hashlimit.h
@@ -2,6 +2,7 @@
#define _UAPI_XT_HASHLIMIT_H
#include <linux/types.h>
+#include <linux/if.h>
/* timings are in milliseconds. */
#define XT_HASHLIMIT_SCALE 10000
diff --git a/include/uapi/linux/netfilter/xt_ipvs.h b/include/uapi/linux/netfilter/xt_ipvs.h
index eff34ac..e03b9c3 100644
--- a/include/uapi/linux/netfilter/xt_ipvs.h
+++ b/include/uapi/linux/netfilter/xt_ipvs.h
@@ -2,6 +2,7 @@
#define _XT_IPVS_H
#include <linux/types.h>
+#include <linux/netfilter.h>
enum {
XT_IPVS_IPVS_PROPERTY = 1 << 0, /* all other options imply this one */
diff --git a/include/uapi/linux/netfilter/xt_mac.h b/include/uapi/linux/netfilter/xt_mac.h
index b892cdc..9a19a08 100644
--- a/include/uapi/linux/netfilter/xt_mac.h
+++ b/include/uapi/linux/netfilter/xt_mac.h
@@ -1,6 +1,8 @@
#ifndef _XT_MAC_H
#define _XT_MAC_H
+#include <linux/if_ether.h>
+
struct xt_mac_info {
unsigned char srcaddr[ETH_ALEN];
int invert;
diff --git a/include/uapi/linux/netfilter/xt_osf.h b/include/uapi/linux/netfilter/xt_osf.h
index 5d66cae..e615995 100644
--- a/include/uapi/linux/netfilter/xt_osf.h
+++ b/include/uapi/linux/netfilter/xt_osf.h
@@ -20,6 +20,8 @@
#define _XT_OSF_H
#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
#define MAXGENRELEN 32
diff --git a/include/uapi/linux/netfilter/xt_physdev.h b/include/uapi/linux/netfilter/xt_physdev.h
index db7a298..ccdde87 100644
--- a/include/uapi/linux/netfilter/xt_physdev.h
+++ b/include/uapi/linux/netfilter/xt_physdev.h
@@ -2,7 +2,7 @@
#define _UAPI_XT_PHYSDEV_H
#include <linux/types.h>
-
+#include <linux/if.h>
#define XT_PHYSDEV_OP_IN 0x01
#define XT_PHYSDEV_OP_OUT 0x02
diff --git a/include/uapi/linux/netfilter/xt_policy.h b/include/uapi/linux/netfilter/xt_policy.h
index be8ead0..d8a9800 100644
--- a/include/uapi/linux/netfilter/xt_policy.h
+++ b/include/uapi/linux/netfilter/xt_policy.h
@@ -2,6 +2,8 @@
#define _XT_POLICY_H
#include <linux/types.h>
+#include <linux/in.h>
+#include <linux/in6.h>
#define XT_POLICY_MAX_ELEM 4
diff --git a/include/uapi/linux/netfilter/xt_rateest.h b/include/uapi/linux/netfilter/xt_rateest.h
index d40a619..13fe50d 100644
--- a/include/uapi/linux/netfilter/xt_rateest.h
+++ b/include/uapi/linux/netfilter/xt_rateest.h
@@ -2,6 +2,7 @@
#define _XT_RATEEST_MATCH_H
#include <linux/types.h>
+#include <linux/if.h>
enum xt_rateest_match_flags {
XT_RATEEST_MATCH_INVERT = 1<<0,
diff --git a/include/uapi/linux/netfilter/xt_recent.h b/include/uapi/linux/netfilter/xt_recent.h
index 6ef36c1..955d562 100644
--- a/include/uapi/linux/netfilter/xt_recent.h
+++ b/include/uapi/linux/netfilter/xt_recent.h
@@ -2,6 +2,7 @@
#define _LINUX_NETFILTER_XT_RECENT_H 1
#include <linux/types.h>
+#include <linux/netfilter.h>
enum {
XT_RECENT_CHECK = 1 << 0,
diff --git a/include/uapi/linux/netfilter/xt_sctp.h b/include/uapi/linux/netfilter/xt_sctp.h
index 29287be..58ffcfb 100644
--- a/include/uapi/linux/netfilter/xt_sctp.h
+++ b/include/uapi/linux/netfilter/xt_sctp.h
@@ -66,26 +66,26 @@
#define SCTP_CHUNKMAP_IS_CLEAR(chunkmap) \
__sctp_chunkmap_is_clear((chunkmap), ARRAY_SIZE(chunkmap))
-static inline bool
+static inline _Bool
__sctp_chunkmap_is_clear(const __u32 *chunkmap, unsigned int n)
{
unsigned int i;
for (i = 0; i < n; ++i)
if (chunkmap[i])
- return false;
- return true;
+ return 0;
+ return 1;
}
#define SCTP_CHUNKMAP_IS_ALL_SET(chunkmap) \
__sctp_chunkmap_is_all_set((chunkmap), ARRAY_SIZE(chunkmap))
-static inline bool
+static inline _Bool
__sctp_chunkmap_is_all_set(const __u32 *chunkmap, unsigned int n)
{
unsigned int i;
for (i = 0; i < n; ++i)
if (chunkmap[i] != ~0U)
- return false;
- return true;
+ return 0;
+ return 1;
}
#endif /* _XT_SCTP_H_ */
diff --git a/include/uapi/linux/netfilter_arp/arp_tables.h b/include/uapi/linux/netfilter_arp/arp_tables.h
index a5a86a4..ece3ad4 100644
--- a/include/uapi/linux/netfilter_arp/arp_tables.h
+++ b/include/uapi/linux/netfilter_arp/arp_tables.h
@@ -11,6 +11,7 @@
#include <linux/types.h>
#include <linux/compiler.h>
+#include <linux/if.h>
#include <linux/netfilter_arp.h>
#include <linux/netfilter/x_tables.h>
diff --git a/include/uapi/linux/netfilter_bridge.h b/include/uapi/linux/netfilter_bridge.h
index a5eda6d..514519b 100644
--- a/include/uapi/linux/netfilter_bridge.h
+++ b/include/uapi/linux/netfilter_bridge.h
@@ -4,6 +4,7 @@
/* bridge-specific defines for netfilter.
*/
+#include <linux/in.h>
#include <linux/netfilter.h>
#include <linux/if_ether.h>
#include <linux/if_vlan.h>
diff --git a/include/uapi/linux/netfilter_bridge/ebt_arp.h b/include/uapi/linux/netfilter_bridge/ebt_arp.h
index 522f3e4..dd4df25 100644
--- a/include/uapi/linux/netfilter_bridge/ebt_arp.h
+++ b/include/uapi/linux/netfilter_bridge/ebt_arp.h
@@ -2,6 +2,7 @@
#define __LINUX_BRIDGE_EBT_ARP_H
#include <linux/types.h>
+#include <linux/if_ether.h>
#define EBT_ARP_OPCODE 0x01
#define EBT_ARP_HTYPE 0x02
diff --git a/include/uapi/linux/netfilter_bridge/ebt_arpreply.h b/include/uapi/linux/netfilter_bridge/ebt_arpreply.h
index 7e77896..6fee340 100644
--- a/include/uapi/linux/netfilter_bridge/ebt_arpreply.h
+++ b/include/uapi/linux/netfilter_bridge/ebt_arpreply.h
@@ -1,6 +1,8 @@
#ifndef __LINUX_BRIDGE_EBT_ARPREPLY_H
#define __LINUX_BRIDGE_EBT_ARPREPLY_H
+#include <linux/if_ether.h>
+
struct ebt_arpreply_info {
unsigned char mac[ETH_ALEN];
int target;
diff --git a/include/uapi/linux/netfilter_bridge/ebt_ip6.h b/include/uapi/linux/netfilter_bridge/ebt_ip6.h
index 42b8896..a062f0c 100644
--- a/include/uapi/linux/netfilter_bridge/ebt_ip6.h
+++ b/include/uapi/linux/netfilter_bridge/ebt_ip6.h
@@ -13,6 +13,7 @@
#define __LINUX_BRIDGE_EBT_IP6_H
#include <linux/types.h>
+#include <linux/in6.h>
#define EBT_IP6_SOURCE 0x01
#define EBT_IP6_DEST 0x02
diff --git a/include/uapi/linux/netfilter_bridge/ebt_nat.h b/include/uapi/linux/netfilter_bridge/ebt_nat.h
index 5e74e3b..c990d74 100644
--- a/include/uapi/linux/netfilter_bridge/ebt_nat.h
+++ b/include/uapi/linux/netfilter_bridge/ebt_nat.h
@@ -1,6 +1,8 @@
#ifndef __LINUX_BRIDGE_EBT_NAT_H
#define __LINUX_BRIDGE_EBT_NAT_H
+#include <linux/if_ether.h>
+
#define NAT_ARP_BIT (0x00000010)
struct ebt_nat_info {
unsigned char mac[ETH_ALEN];
diff --git a/include/uapi/linux/netfilter_bridge/ebtables.h b/include/uapi/linux/netfilter_bridge/ebtables.h
index fd2ee50..e3cdf9f 100644
--- a/include/uapi/linux/netfilter_bridge/ebtables.h
+++ b/include/uapi/linux/netfilter_bridge/ebtables.h
@@ -12,6 +12,8 @@
#ifndef _UAPI__LINUX_BRIDGE_EFF_H
#define _UAPI__LINUX_BRIDGE_EFF_H
+#include <linux/types.h>
+#include <linux/if.h>
#include <linux/netfilter_bridge.h>
#define EBT_TABLE_MAXNAMELEN 32
@@ -33,8 +35,8 @@
struct xt_target;
struct ebt_counter {
- uint64_t pcnt;
- uint64_t bcnt;
+ __u64 pcnt;
+ __u64 bcnt;
};
struct ebt_replace {
diff --git a/include/uapi/linux/netfilter_ipv4/ip_tables.h b/include/uapi/linux/netfilter_ipv4/ip_tables.h
index f1e6ef2..d0da53d 100644
--- a/include/uapi/linux/netfilter_ipv4/ip_tables.h
+++ b/include/uapi/linux/netfilter_ipv4/ip_tables.h
@@ -17,6 +17,7 @@
#include <linux/types.h>
#include <linux/compiler.h>
+#include <linux/if.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter/x_tables.h>
diff --git a/include/uapi/linux/netfilter_ipv6/ip6_tables.h b/include/uapi/linux/netfilter_ipv6/ip6_tables.h
index 649c680..d1b2265 100644
--- a/include/uapi/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/uapi/linux/netfilter_ipv6/ip6_tables.h
@@ -17,6 +17,7 @@
#include <linux/types.h>
#include <linux/compiler.h>
+#include <linux/if.h>
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter/x_tables.h>
diff --git a/include/uapi/linux/netfilter_ipv6/ip6t_rt.h b/include/uapi/linux/netfilter_ipv6/ip6t_rt.h
index 7605a5f..558f81e 100644
--- a/include/uapi/linux/netfilter_ipv6/ip6t_rt.h
+++ b/include/uapi/linux/netfilter_ipv6/ip6t_rt.h
@@ -2,7 +2,7 @@
#define _IP6T_RT_H
#include <linux/types.h>
-/*#include <linux/in6.h>*/
+#include <linux/in6.h>
#define IP6T_RT_HOPS 16
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 17fd5f2..98de6e7 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -65,8 +65,8 @@
return false;
if (FWINV(info->protocol != nexthdr, EBT_IP6_PROTO))
return false;
- if (!(info->bitmask & ( EBT_IP6_DPORT |
- EBT_IP6_SPORT | EBT_IP6_ICMP6)))
+ if (!(info->bitmask & (EBT_IP6_DPORT |
+ EBT_IP6_SPORT | EBT_IP6_ICMP6)))
return true;
/* min icmpv6 headersize is 4, so sizeof(_pkthdr) is ok. */
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 0ad639a..152300d 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -36,14 +36,12 @@
return 0;
}
-struct tcpudphdr
-{
+struct tcpudphdr {
__be16 src;
__be16 dst;
};
-struct arppayload
-{
+struct arppayload {
unsigned char mac_src[ETH_ALEN];
unsigned char ip_src[4];
unsigned char mac_dst[ETH_ALEN];
@@ -152,7 +150,8 @@
ntohs(ah->ar_op));
/* If it's for Ethernet and the lengths are OK,
- * then log the ARP payload */
+ * then log the ARP payload
+ */
if (ah->ar_hrd == htons(1) &&
ah->ar_hln == ETH_ALEN &&
ah->ar_pln == sizeof(__be32)) {
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 0c40570..6b731e1 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -41,7 +41,7 @@
#define NR32(p) ((p[0] << 24) | (p[1] << 16) | (p[2] << 8) | p[3])
static bool ebt_filter_config(const struct ebt_stp_info *info,
- const struct stp_config_pdu *stpc)
+ const struct stp_config_pdu *stpc)
{
const struct ebt_stp_config_info *c;
uint16_t v16;
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index 6185688..98c221d 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -66,7 +66,8 @@
* - Canonical Format Indicator (CFI). The Canonical Format Indicator
* (CFI) is a single bit flag value. Currently ignored.
* - VLAN Identifier (VID). The VID is encoded as
- * an unsigned binary number. */
+ * an unsigned binary number.
+ */
id = TCI & VLAN_VID_MASK;
prio = (TCI >> 13) & 0x7;
@@ -98,7 +99,8 @@
}
/* Check for bitmask range
- * True if even one bit is out of mask */
+ * True if even one bit is out of mask
+ */
if (info->bitmask & ~EBT_VLAN_MASK) {
pr_debug("bitmask %2X is out of mask (%2X)\n",
info->bitmask, EBT_VLAN_MASK);
@@ -117,7 +119,8 @@
* 0 - The null VLAN ID.
* 1 - The default Port VID (PVID)
* 0x0FFF - Reserved for implementation use.
- * if_vlan.h: VLAN_N_VID 4096. */
+ * if_vlan.h: VLAN_N_VID 4096.
+ */
if (GET_BITMASK(EBT_VLAN_ID)) {
if (!!info->id) { /* if id!=0 => check vid range */
if (info->id > VLAN_N_VID) {
@@ -128,7 +131,8 @@
/* Note: This is valid VLAN-tagged frame point.
* Any value of user_priority are acceptable,
* but should be ignored according to 802.1Q Std.
- * So we just drop the prio flag. */
+ * So we just drop the prio flag.
+ */
info->bitmask &= ~EBT_VLAN_PRIO;
}
/* Else, id=0 (null VLAN ID) => user_priority range (any?) */
@@ -143,7 +147,8 @@
}
/* Check for encapsulated proto range - it is possible to be
* any value for u_short range.
- * if_ether.h: ETH_ZLEN 60 - Min. octets in frame sans FCS */
+ * if_ether.h: ETH_ZLEN 60 - Min. octets in frame sans FCS
+ */
if (GET_BITMASK(EBT_VLAN_ENCAP)) {
if ((unsigned short) ntohs(info->encap) < ETH_ZLEN) {
pr_debug("encap frame length %d is less than "
diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c
index 32eccd1..593a1bdc 100644
--- a/net/bridge/netfilter/ebtable_filter.c
+++ b/net/bridge/netfilter/ebtable_filter.c
@@ -12,7 +12,7 @@
#include <linux/module.h>
#define FILTER_VALID_HOOKS ((1 << NF_BR_LOCAL_IN) | (1 << NF_BR_FORWARD) | \
- (1 << NF_BR_LOCAL_OUT))
+ (1 << NF_BR_LOCAL_OUT))
static struct ebt_entries initial_chains[] = {
{
diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c
index ec55358..eb33919 100644
--- a/net/bridge/netfilter/ebtable_nat.c
+++ b/net/bridge/netfilter/ebtable_nat.c
@@ -12,7 +12,7 @@
#include <linux/module.h>
#define NAT_VALID_HOOKS ((1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_LOCAL_OUT) | \
- (1 << NF_BR_POST_ROUTING))
+ (1 << NF_BR_POST_ROUTING))
static struct ebt_entries initial_chains[] = {
{
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index f46ca41..67b2e27 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -35,8 +35,7 @@
"report to author: "format, ## args)
/* #define BUGPRINT(format, args...) */
-/*
- * Each cpu has its own set of counters, so there is no need for write_lock in
+/* Each cpu has its own set of counters, so there is no need for write_lock in
* the softirq
* For reading or updating the counters, the user context needs to
* get a write_lock
@@ -46,7 +45,7 @@
#define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
#define COUNTER_OFFSET(n) (SMP_ALIGN(n * sizeof(struct ebt_counter)))
#define COUNTER_BASE(c, n, cpu) ((struct ebt_counter *)(((char *)c) + \
- COUNTER_OFFSET(n) * cpu))
+ COUNTER_OFFSET(n) * cpu))
@@ -126,7 +125,7 @@
/* process standard matches */
static inline int
ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
- const struct net_device *in, const struct net_device *out)
+ const struct net_device *in, const struct net_device *out)
{
const struct ethhdr *h = eth_hdr(skb);
const struct net_bridge_port *p;
@@ -162,7 +161,7 @@
for (i = 0; i < 6; i++)
verdict |= (h->h_source[i] ^ e->sourcemac[i]) &
e->sourcemsk[i];
- if (FWINV2(verdict != 0, EBT_ISOURCE) )
+ if (FWINV2(verdict != 0, EBT_ISOURCE))
return 1;
}
if (e->bitmask & EBT_DESTMAC) {
@@ -170,7 +169,7 @@
for (i = 0; i < 6; i++)
verdict |= (h->h_dest[i] ^ e->destmac[i]) &
e->destmsk[i];
- if (FWINV2(verdict != 0, EBT_IDEST) )
+ if (FWINV2(verdict != 0, EBT_IDEST))
return 1;
}
return 0;
@@ -237,7 +236,8 @@
(*(counter_base + i)).bcnt += skb->len;
/* these should only watch: not modify, nor tell us
- what to do with the packet */
+ * what to do with the packet
+ */
EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &acpar);
t = (struct ebt_entry_target *)
@@ -323,7 +323,7 @@
/* If it succeeds, returns element and locks mutex */
static inline void *
find_inlist_lock_noload(struct list_head *head, const char *name, int *error,
- struct mutex *mutex)
+ struct mutex *mutex)
{
struct {
struct list_head list;
@@ -342,7 +342,7 @@
static void *
find_inlist_lock(struct list_head *head, const char *name, const char *prefix,
- int *error, struct mutex *mutex)
+ int *error, struct mutex *mutex)
{
return try_then_request_module(
find_inlist_lock_noload(head, name, error, mutex),
@@ -451,7 +451,8 @@
if (i != NF_BR_NUMHOOKS || !(e->bitmask & EBT_ENTRY_OR_ENTRIES)) {
if (e->bitmask != 0) {
/* we make userspace set this right,
- so there is no misunderstanding */
+ * so there is no misunderstanding
+ */
BUGPRINT("EBT_ENTRY_OR_ENTRIES shouldn't be set "
"in distinguisher\n");
return -EINVAL;
@@ -487,15 +488,14 @@
return 0;
}
-/*
- * this one is very careful, as it is the first function
+/* this one is very careful, as it is the first function
* to parse the userspace data
*/
static inline int
ebt_check_entry_size_and_hooks(const struct ebt_entry *e,
- const struct ebt_table_info *newinfo,
- unsigned int *n, unsigned int *cnt,
- unsigned int *totalcnt, unsigned int *udc_cnt)
+ const struct ebt_table_info *newinfo,
+ unsigned int *n, unsigned int *cnt,
+ unsigned int *totalcnt, unsigned int *udc_cnt)
{
int i;
@@ -504,10 +504,12 @@
break;
}
/* beginning of a new chain
- if i == NF_BR_NUMHOOKS it must be a user defined chain */
+ * if i == NF_BR_NUMHOOKS it must be a user defined chain
+ */
if (i != NF_BR_NUMHOOKS || !e->bitmask) {
/* this checks if the previous chain has as many entries
- as it said it has */
+ * as it said it has
+ */
if (*n != *cnt) {
BUGPRINT("nentries does not equal the nr of entries "
"in the chain\n");
@@ -549,20 +551,18 @@
return 0;
}
-struct ebt_cl_stack
-{
+struct ebt_cl_stack {
struct ebt_chainstack cs;
int from;
unsigned int hookmask;
};
-/*
- * we need these positions to check that the jumps to a different part of the
+/* We need these positions to check that the jumps to a different part of the
* entries is a jump to the beginning of a new chain.
*/
static inline int
ebt_get_udc_positions(struct ebt_entry *e, struct ebt_table_info *newinfo,
- unsigned int *n, struct ebt_cl_stack *udc)
+ unsigned int *n, struct ebt_cl_stack *udc)
{
int i;
@@ -649,9 +649,9 @@
static inline int
ebt_check_entry(struct ebt_entry *e, struct net *net,
- const struct ebt_table_info *newinfo,
- const char *name, unsigned int *cnt,
- struct ebt_cl_stack *cl_s, unsigned int udc_cnt)
+ const struct ebt_table_info *newinfo,
+ const char *name, unsigned int *cnt,
+ struct ebt_cl_stack *cl_s, unsigned int udc_cnt)
{
struct ebt_entry_target *t;
struct xt_target *target;
@@ -673,7 +673,7 @@
BUGPRINT("Unknown flag for inv bitmask\n");
return -EINVAL;
}
- if ( (e->bitmask & EBT_NOPROTO) && (e->bitmask & EBT_802_3) ) {
+ if ((e->bitmask & EBT_NOPROTO) && (e->bitmask & EBT_802_3)) {
BUGPRINT("NOPROTO & 802_3 not allowed\n");
return -EINVAL;
}
@@ -687,7 +687,8 @@
break;
}
/* (1 << NF_BR_NUMHOOKS) tells the check functions the rule is on
- a base chain */
+ * a base chain
+ */
if (i < NF_BR_NUMHOOKS)
hookmask = (1 << hook) | (1 << NF_BR_NUMHOOKS);
else {
@@ -758,13 +759,12 @@
return ret;
}
-/*
- * checks for loops and sets the hook mask for udc
+/* checks for loops and sets the hook mask for udc
* the hook mask for udc tells us from which base chains the udc can be
* accessed. This mask is a parameter to the check() functions of the extensions
*/
static int check_chainloops(const struct ebt_entries *chain, struct ebt_cl_stack *cl_s,
- unsigned int udc_cnt, unsigned int hooknr, char *base)
+ unsigned int udc_cnt, unsigned int hooknr, char *base)
{
int i, chain_nr = -1, pos = 0, nentries = chain->nentries, verdict;
const struct ebt_entry *e = (struct ebt_entry *)chain->data;
@@ -853,7 +853,8 @@
return -EINVAL;
}
/* make sure chains are ordered after each other in same order
- as their corresponding hooks */
+ * as their corresponding hooks
+ */
for (j = i + 1; j < NF_BR_NUMHOOKS; j++) {
if (!newinfo->hook_entry[j])
continue;
@@ -868,7 +869,8 @@
i = 0; /* holds the expected nr. of entries for the chain */
j = 0; /* holds the up to now counted entries for the chain */
k = 0; /* holds the total nr. of entries, should equal
- newinfo->nentries afterwards */
+ * newinfo->nentries afterwards
+ */
udc_cnt = 0; /* will hold the nr. of user defined chains (udc) */
ret = EBT_ENTRY_ITERATE(newinfo->entries, newinfo->entries_size,
ebt_check_entry_size_and_hooks, newinfo,
@@ -888,10 +890,12 @@
}
/* get the location of the udc, put them in an array
- while we're at it, allocate the chainstack */
+ * while we're at it, allocate the chainstack
+ */
if (udc_cnt) {
/* this will get free'd in do_replace()/ebt_register_table()
- if an error occurs */
+ * if an error occurs
+ */
newinfo->chainstack =
vmalloc(nr_cpu_ids * sizeof(*(newinfo->chainstack)));
if (!newinfo->chainstack)
@@ -932,14 +936,15 @@
}
/* we now know the following (along with E=mc²):
- - the nr of entries in each chain is right
- - the size of the allocated space is right
- - all valid hooks have a corresponding chain
- - there are no loops
- - wrong data can still be on the level of a single entry
- - could be there are jumps to places that are not the
- beginning of a chain. This can only occur in chains that
- are not accessible from any base chains, so we don't care. */
+ * - the nr of entries in each chain is right
+ * - the size of the allocated space is right
+ * - all valid hooks have a corresponding chain
+ * - there are no loops
+ * - wrong data can still be on the level of a single entry
+ * - could be there are jumps to places that are not the
+ * beginning of a chain. This can only occur in chains that
+ * are not accessible from any base chains, so we don't care.
+ */
/* used to know what we need to clean up if something goes wrong */
i = 0;
@@ -955,7 +960,7 @@
/* called under write_lock */
static void get_counters(const struct ebt_counter *oldcounters,
- struct ebt_counter *counters, unsigned int nentries)
+ struct ebt_counter *counters, unsigned int nentries)
{
int i, cpu;
struct ebt_counter *counter_base;
@@ -986,7 +991,8 @@
struct ebt_table *t;
/* the user wants counters back
- the check on the size is done later, when we have the lock */
+ * the check on the size is done later, when we have the lock
+ */
if (repl->num_counters) {
unsigned long size = repl->num_counters * sizeof(*counterstmp);
counterstmp = vmalloc(size);
@@ -1038,9 +1044,10 @@
write_unlock_bh(&t->lock);
mutex_unlock(&ebt_mutex);
/* so, a user can change the chains while having messed up her counter
- allocation. Only reason why this is done is because this way the lock
- is held only once, while this doesn't bring the kernel into a
- dangerous state. */
+ * allocation. Only reason why this is done is because this way the lock
+ * is held only once, while this doesn't bring the kernel into a
+ * dangerous state.
+ */
if (repl->num_counters &&
copy_to_user(repl->counters, counterstmp,
repl->num_counters * sizeof(struct ebt_counter))) {
@@ -1342,13 +1349,14 @@
}
static inline int ebt_make_matchname(const struct ebt_entry_match *m,
- const char *base, char __user *ubase)
+ const char *base, char __user *ubase)
{
char __user *hlp = ubase + ((char *)m - base);
char name[EBT_FUNCTION_MAXNAMELEN] = {};
/* ebtables expects 32 bytes long names but xt_match names are 29 bytes
- long. Copy 29 bytes and fill remaining bytes with zeroes. */
+ * long. Copy 29 bytes and fill remaining bytes with zeroes.
+ */
strlcpy(name, m->u.match->name, sizeof(name));
if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN))
return -EFAULT;
@@ -1356,19 +1364,19 @@
}
static inline int ebt_make_watchername(const struct ebt_entry_watcher *w,
- const char *base, char __user *ubase)
+ const char *base, char __user *ubase)
{
char __user *hlp = ubase + ((char *)w - base);
char name[EBT_FUNCTION_MAXNAMELEN] = {};
strlcpy(name, w->u.watcher->name, sizeof(name));
- if (copy_to_user(hlp , name, EBT_FUNCTION_MAXNAMELEN))
+ if (copy_to_user(hlp, name, EBT_FUNCTION_MAXNAMELEN))
return -EFAULT;
return 0;
}
-static inline int
-ebt_make_names(struct ebt_entry *e, const char *base, char __user *ubase)
+static inline int ebt_make_names(struct ebt_entry *e, const char *base,
+ char __user *ubase)
{
int ret;
char __user *hlp;
@@ -1394,9 +1402,9 @@
}
static int copy_counters_to_user(struct ebt_table *t,
- const struct ebt_counter *oldcounters,
- void __user *user, unsigned int num_counters,
- unsigned int nentries)
+ const struct ebt_counter *oldcounters,
+ void __user *user, unsigned int num_counters,
+ unsigned int nentries)
{
struct ebt_counter *counterstmp;
int ret = 0;
@@ -1427,7 +1435,7 @@
/* called with ebt_mutex locked */
static int copy_everything_to_user(struct ebt_table *t, void __user *user,
- const int *len, int cmd)
+ const int *len, int cmd)
{
struct ebt_replace tmp;
const struct ebt_counter *oldcounters;
@@ -1595,8 +1603,7 @@
static int ebt_compat_match_offset(const struct xt_match *match,
unsigned int userlen)
{
- /*
- * ebt_among needs special handling. The kernel .matchsize is
+ /* ebt_among needs special handling. The kernel .matchsize is
* set to -1 at registration time; at runtime an EBT_ALIGN()ed
* value is expected.
* Example: userspace sends 4500, ebt_among.c wants 4504.
@@ -1966,8 +1973,7 @@
return off + match_size;
}
-/*
- * return size of all matches, watchers or target, including necessary
+/* return size of all matches, watchers or target, including necessary
* alignment and padding.
*/
static int ebt_size_mwt(struct compat_ebt_entry_mwt *match32,
@@ -2070,8 +2076,7 @@
if (ret < 0)
return ret;
buf_start = (char *) entry;
- /*
- * 0: matches offset, always follows ebt_entry.
+ /* 0: matches offset, always follows ebt_entry.
* 1: watchers offset, from ebt_entry structure
* 2: target offset, from ebt_entry structure
* 3: next ebt_entry offset, from ebt_entry structure
@@ -2115,8 +2120,7 @@
return 0;
}
-/*
- * repl->entries_size is the size of the ebt_entry blob in userspace.
+/* repl->entries_size is the size of the ebt_entry blob in userspace.
* It might need more memory when copied to a 64 bit kernel in case
* userspace is 32-bit. So, first task: find out how much memory is needed.
*
@@ -2305,7 +2309,7 @@
break;
default:
ret = -EINVAL;
- }
+ }
return ret;
}
@@ -2360,8 +2364,7 @@
break;
case EBT_SO_GET_ENTRIES:
case EBT_SO_GET_INIT_ENTRIES:
- /*
- * try real handler first in case of userland-side padding.
+ /* try real handler first in case of userland-side padding.
* in case we are dealing with an 'ordinary' 32 bit binary
* without 64bit compatibility padding, this will fail right
* after copy_from_user when the *len argument is validated.
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
index a21269b..4b901d9 100644
--- a/net/bridge/netfilter/nft_meta_bridge.c
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -84,6 +84,7 @@
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
.eval = nft_meta_set_eval,
.init = nft_meta_set_init,
+ .destroy = nft_meta_set_destroy,
.dump = nft_meta_set_dump,
};
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index c747b2d..b6ea57e 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -14,7 +14,6 @@
#include <net/netfilter/ipv4/nf_reject.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_bridge.h>
-#include <net/netfilter/ipv4/nf_reject.h>
const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb,
struct tcphdr *_oth, int hook)
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index bab4441..e4347ae 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -56,7 +56,6 @@
{
struct inet6_skb_parm h;
int offset;
- struct sk_buff *orig;
};
#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb *)((skb)->cb))
@@ -170,12 +169,6 @@
return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
}
-static void nf_skb_free(struct sk_buff *skb)
-{
- if (NFCT_FRAG6_CB(skb)->orig)
- kfree_skb(NFCT_FRAG6_CB(skb)->orig);
-}
-
static void nf_ct_frag6_expire(unsigned long data)
{
struct frag_queue *fq;
@@ -369,17 +362,18 @@
/*
* Check if this packet is complete.
- * Returns NULL on failure by any reason, and pointer
- * to current nexthdr field in reassembled frame.
*
* It is called with locked fq, and caller must check that
* queue is eligible for reassembly i.e. it is not COMPLETE,
* the last and the first frames arrived and all the bits are here.
+ *
+ * returns true if *prev skb has been transformed into the reassembled
+ * skb, false otherwise.
*/
-static struct sk_buff *
-nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
+static bool
+nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev)
{
- struct sk_buff *fp, *op, *head = fq->q.fragments;
+ struct sk_buff *fp, *head = fq->q.fragments;
int payload_len;
u8 ecn;
@@ -390,22 +384,21 @@
ecn = ip_frag_ecn_table[fq->ecn];
if (unlikely(ecn == 0xff))
- goto out_fail;
+ return false;
/* Unfragmented part is taken from the first segment. */
payload_len = ((head->data - skb_network_header(head)) -
sizeof(struct ipv6hdr) + fq->q.len -
sizeof(struct frag_hdr));
if (payload_len > IPV6_MAXPLEN) {
- pr_debug("payload len is too large.\n");
- goto out_oversize;
+ net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n",
+ payload_len);
+ return false;
}
/* Head of list must not be cloned. */
- if (skb_unclone(head, GFP_ATOMIC)) {
- pr_debug("skb is cloned but can't expand head");
- goto out_oom;
- }
+ if (skb_unclone(head, GFP_ATOMIC))
+ return false;
/* If the first fragment is fragmented itself, we split
* it to two chunks: the first with data and paged part
@@ -416,7 +409,7 @@
clone = alloc_skb(0, GFP_ATOMIC);
if (clone == NULL)
- goto out_oom;
+ return false;
clone->next = head->next;
head->next = clone;
@@ -430,10 +423,41 @@
clone->csum = 0;
clone->ip_summed = head->ip_summed;
- NFCT_FRAG6_CB(clone)->orig = NULL;
add_frag_mem_limit(fq->q.net, clone->truesize);
}
+ /* morph head into last received skb: prev.
+ *
+ * This allows callers of ipv6 conntrack defrag to continue
+ * to use the last skb(frag) passed into the reasm engine.
+ * The last skb frag 'silently' turns into the full reassembled skb.
+ *
+ * Since prev is also part of q->fragments we have to clone it first.
+ */
+ if (head != prev) {
+ struct sk_buff *iter;
+
+ fp = skb_clone(prev, GFP_ATOMIC);
+ if (!fp)
+ return false;
+
+ fp->next = prev->next;
+
+ iter = head;
+ while (iter) {
+ if (iter->next == prev) {
+ iter->next = fp;
+ break;
+ }
+ iter = iter->next;
+ }
+
+ skb_morph(prev, head);
+ prev->next = head->next;
+ consume_skb(head);
+ head = prev;
+ }
+
/* We have to remove fragment header from datagram and to relocate
* header in order to calculate ICV correctly. */
skb_network_header(head)[fq->nhoffset] = skb_transport_header(head)[0];
@@ -474,31 +498,7 @@
fq->q.fragments = NULL;
fq->q.fragments_tail = NULL;
- /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */
- fp = skb_shinfo(head)->frag_list;
- if (fp && NFCT_FRAG6_CB(fp)->orig == NULL)
- /* at above code, head skb is divided into two skbs. */
- fp = fp->next;
-
- op = NFCT_FRAG6_CB(head)->orig;
- for (; fp; fp = fp->next) {
- struct sk_buff *orig = NFCT_FRAG6_CB(fp)->orig;
-
- op->next = orig;
- op = orig;
- NFCT_FRAG6_CB(fp)->orig = NULL;
- }
-
- return head;
-
-out_oversize:
- net_dbg_ratelimited("nf_ct_frag6_reasm: payload len = %d\n",
- payload_len);
- goto out_fail;
-out_oom:
- net_dbg_ratelimited("nf_ct_frag6_reasm: no memory for reassembly\n");
-out_fail:
- return NULL;
+ return true;
}
/*
@@ -564,89 +564,61 @@
return 0;
}
-struct sk_buff *nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
+int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
{
- struct sk_buff *clone;
struct net_device *dev = skb->dev;
+ int fhoff, nhoff, ret;
struct frag_hdr *fhdr;
struct frag_queue *fq;
struct ipv6hdr *hdr;
- int fhoff, nhoff;
u8 prevhdr;
- struct sk_buff *ret_skb = NULL;
/* Jumbo payload inhibits frag. header */
if (ipv6_hdr(skb)->payload_len == 0) {
pr_debug("payload len = 0\n");
- return skb;
+ return -EINVAL;
}
if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0)
- return skb;
+ return -EINVAL;
- clone = skb_clone(skb, GFP_ATOMIC);
- if (clone == NULL) {
- pr_debug("Can't clone skb\n");
- return skb;
- }
+ if (!pskb_may_pull(skb, fhoff + sizeof(*fhdr)))
+ return -ENOMEM;
- NFCT_FRAG6_CB(clone)->orig = skb;
-
- if (!pskb_may_pull(clone, fhoff + sizeof(*fhdr))) {
- pr_debug("message is too short.\n");
- goto ret_orig;
- }
-
- skb_set_transport_header(clone, fhoff);
- hdr = ipv6_hdr(clone);
- fhdr = (struct frag_hdr *)skb_transport_header(clone);
+ skb_set_transport_header(skb, fhoff);
+ hdr = ipv6_hdr(skb);
+ fhdr = (struct frag_hdr *)skb_transport_header(skb);
fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
if (fq == NULL) {
pr_debug("Can't find and can't create new queue\n");
- goto ret_orig;
+ return -ENOMEM;
}
spin_lock_bh(&fq->q.lock);
- if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
- spin_unlock_bh(&fq->q.lock);
- pr_debug("Can't insert skb to queue\n");
- inet_frag_put(&fq->q, &nf_frags);
- goto ret_orig;
+ if (nf_ct_frag6_queue(fq, skb, fhdr, nhoff) < 0) {
+ ret = -EINVAL;
+ goto out_unlock;
}
+ /* after queue has assumed skb ownership, only 0 or -EINPROGRESS
+ * must be returned.
+ */
+ ret = -EINPROGRESS;
if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
- fq->q.meat == fq->q.len) {
- ret_skb = nf_ct_frag6_reasm(fq, dev);
- if (ret_skb == NULL)
- pr_debug("Can't reassemble fragmented packets\n");
- }
+ fq->q.meat == fq->q.len &&
+ nf_ct_frag6_reasm(fq, skb, dev))
+ ret = 0;
+
+out_unlock:
spin_unlock_bh(&fq->q.lock);
-
inet_frag_put(&fq->q, &nf_frags);
- return ret_skb;
-
-ret_orig:
- kfree_skb(clone);
- return skb;
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_frag6_gather);
-void nf_ct_frag6_consume_orig(struct sk_buff *skb)
-{
- struct sk_buff *s, *s2;
-
- for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
- s2 = s->next;
- s->next = NULL;
- consume_skb(s);
- s = s2;
- }
-}
-EXPORT_SYMBOL_GPL(nf_ct_frag6_consume_orig);
-
static int nf_ct_net_init(struct net *net)
{
int res;
@@ -681,7 +653,6 @@
nf_frags.hashfn = nf_hashfn;
nf_frags.constructor = ip6_frag_init;
nf_frags.destructor = NULL;
- nf_frags.skb_free = nf_skb_free;
nf_frags.qsize = sizeof(struct frag_queue);
nf_frags.match = ip6_frag_match;
nf_frags.frag_expire = nf_ct_frag6_expire;
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index 4fdbed5e..f7aab5a 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -55,7 +55,7 @@
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- struct sk_buff *reasm;
+ int err;
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
/* Previously seen (loopback)? */
@@ -63,23 +63,13 @@
return NF_ACCEPT;
#endif
- reasm = nf_ct_frag6_gather(state->net, skb,
- nf_ct6_defrag_user(state->hook, skb));
+ err = nf_ct_frag6_gather(state->net, skb,
+ nf_ct6_defrag_user(state->hook, skb));
/* queued */
- if (reasm == NULL)
+ if (err == -EINPROGRESS)
return NF_STOLEN;
- /* error occurred or not fragmented */
- if (reasm == skb)
- return NF_ACCEPT;
-
- nf_ct_frag6_consume_orig(reasm);
-
- NF_HOOK_THRESH(NFPROTO_IPV6, state->hook, state->net, state->sk, reasm,
- state->in, state->out,
- state->okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
-
- return NF_STOLEN;
+ return NF_ACCEPT;
}
static struct nf_hook_ops ipv6_defrag_ops[] = {
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index e0f922b..4709f65 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -14,7 +14,6 @@
#include <net/netfilter/ipv6/nf_reject.h>
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_bridge.h>
-#include <net/netfilter/ipv6/nf_reject.h>
const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb,
struct tcphdr *otcph,
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 7638c36..2293484 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -67,7 +67,7 @@
obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
# nf_tables
-nf_tables-objs += nf_tables_core.o nf_tables_api.o
+nf_tables-objs += nf_tables_core.o nf_tables_api.o nf_tables_trace.o
nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o
nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index acf5c7b..278927a 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -596,11 +596,18 @@
{
#ifdef CONFIG_NF_CONNTRACK_PROCFS
struct proc_dir_entry *proc;
+ kuid_t root_uid;
+ kgid_t root_gid;
proc = proc_create("nf_conntrack_expect", 0440, net->proc_net,
&exp_file_ops);
if (!proc)
return -ENOMEM;
+
+ root_uid = make_kuid(net->user_ns, 0);
+ root_gid = make_kgid(net->user_ns, 0);
+ if (uid_valid(root_uid) && gid_valid(root_gid))
+ proc_set_user(proc, root_uid, root_gid);
#endif /* CONFIG_NF_CONNTRACK_PROCFS */
return 0;
}
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 1fb3cac..0f1a45b 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -392,11 +392,18 @@
static int nf_conntrack_standalone_init_proc(struct net *net)
{
struct proc_dir_entry *pde;
+ kuid_t root_uid;
+ kgid_t root_gid;
pde = proc_create("nf_conntrack", 0440, net->proc_net, &ct_file_ops);
if (!pde)
goto out_nf_conntrack;
+ root_uid = make_kuid(net->user_ns, 0);
+ root_gid = make_kgid(net->user_ns, 0);
+ if (uid_valid(root_uid) && gid_valid(root_gid))
+ proc_set_user(pde, root_uid, root_gid);
+
pde = proc_create("nf_conntrack", S_IRUGO, net->proc_net_stat,
&ct_cpu_seq_fops);
if (!pde)
diff --git a/net/netfilter/nf_conntrack_timeout.c b/net/netfilter/nf_conntrack_timeout.c
index 93da609..26e7420 100644
--- a/net/netfilter/nf_conntrack_timeout.c
+++ b/net/netfilter/nf_conntrack_timeout.c
@@ -25,7 +25,7 @@
#include <net/netfilter/nf_conntrack_timeout.h>
struct ctnl_timeout *
-(*nf_ct_timeout_find_get_hook)(const char *name) __read_mostly;
+(*nf_ct_timeout_find_get_hook)(struct net *net, const char *name) __read_mostly;
EXPORT_SYMBOL_GPL(nf_ct_timeout_find_get_hook);
void (*nf_ct_timeout_put_hook)(struct ctnl_timeout *timeout) __read_mostly;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 2cb429d..4a23f77 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4441,22 +4441,22 @@
}
}
-static int nft_verdict_dump(struct sk_buff *skb, const struct nft_data *data)
+int nft_verdict_dump(struct sk_buff *skb, int type, const struct nft_verdict *v)
{
struct nlattr *nest;
- nest = nla_nest_start(skb, NFTA_DATA_VERDICT);
+ nest = nla_nest_start(skb, type);
if (!nest)
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(data->verdict.code)))
+ if (nla_put_be32(skb, NFTA_VERDICT_CODE, htonl(v->code)))
goto nla_put_failure;
- switch (data->verdict.code) {
+ switch (v->code) {
case NFT_JUMP:
case NFT_GOTO:
if (nla_put_string(skb, NFTA_VERDICT_CHAIN,
- data->verdict.chain->name))
+ v->chain->name))
goto nla_put_failure;
}
nla_nest_end(skb, nest);
@@ -4567,7 +4567,7 @@
err = nft_value_dump(skb, data, len);
break;
case NFT_DATA_VERDICT:
- err = nft_verdict_dump(skb, data);
+ err = nft_verdict_dump(skb, NFTA_DATA_VERDICT, &data->verdict);
break;
default:
err = -EINVAL;
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index f3695a4..e9f8dff 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -16,22 +16,17 @@
#include <linux/skbuff.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
+#include <linux/static_key.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_log.h>
-enum nft_trace {
- NFT_TRACE_RULE,
- NFT_TRACE_RETURN,
- NFT_TRACE_POLICY,
-};
-
-static const char *const comments[] = {
- [NFT_TRACE_RULE] = "rule",
- [NFT_TRACE_RETURN] = "return",
- [NFT_TRACE_POLICY] = "policy",
+static const char *const comments[__NFT_TRACETYPE_MAX] = {
+ [NFT_TRACETYPE_POLICY] = "policy",
+ [NFT_TRACETYPE_RETURN] = "return",
+ [NFT_TRACETYPE_RULE] = "rule",
};
static struct nf_loginfo trace_loginfo = {
@@ -44,22 +39,36 @@
},
};
-static void __nft_trace_packet(const struct nft_pktinfo *pkt,
- const struct nft_chain *chain,
- int rulenum, enum nft_trace type)
+static noinline void __nft_trace_packet(struct nft_traceinfo *info,
+ const struct nft_chain *chain,
+ int rulenum, enum nft_trace_types type)
{
+ const struct nft_pktinfo *pkt = info->pkt;
+
+ if (!info->trace || !pkt->skb->nf_trace)
+ return;
+
+ info->chain = chain;
+ info->type = type;
+
+ nft_trace_notify(info);
+
nf_log_trace(pkt->net, pkt->pf, pkt->hook, pkt->skb, pkt->in,
pkt->out, &trace_loginfo, "TRACE: %s:%s:%s:%u ",
chain->table->name, chain->name, comments[type],
rulenum);
}
-static inline void nft_trace_packet(const struct nft_pktinfo *pkt,
+static inline void nft_trace_packet(struct nft_traceinfo *info,
const struct nft_chain *chain,
- int rulenum, enum nft_trace type)
+ const struct nft_rule *rule,
+ int rulenum,
+ enum nft_trace_types type)
{
- if (unlikely(pkt->skb->nf_trace))
- __nft_trace_packet(pkt, chain, rulenum, type);
+ if (static_branch_unlikely(&nft_trace_enabled)) {
+ info->rule = rule;
+ __nft_trace_packet(info, chain, rulenum, type);
+ }
}
static void nft_cmp_fast_eval(const struct nft_expr *expr,
@@ -121,7 +130,11 @@
struct nft_stats *stats;
int rulenum;
unsigned int gencursor = nft_genmask_cur(net);
+ struct nft_traceinfo info;
+ info.trace = false;
+ if (static_branch_unlikely(&nft_trace_enabled))
+ nft_trace_init(&info, pkt, ®s.verdict, basechain);
do_chain:
rulenum = 0;
rule = list_entry(&chain->rules, struct nft_rule, list);
@@ -151,7 +164,8 @@
regs.verdict.code = NFT_CONTINUE;
continue;
case NFT_CONTINUE:
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+ nft_trace_packet(&info, chain, rule,
+ rulenum, NFT_TRACETYPE_RULE);
continue;
}
break;
@@ -161,7 +175,8 @@
case NF_ACCEPT:
case NF_DROP:
case NF_QUEUE:
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+ nft_trace_packet(&info, chain, rule,
+ rulenum, NFT_TRACETYPE_RULE);
return regs.verdict.code;
}
@@ -174,7 +189,8 @@
stackptr++;
/* fall through */
case NFT_GOTO:
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RULE);
+ nft_trace_packet(&info, chain, rule,
+ rulenum, NFT_TRACETYPE_RULE);
chain = regs.verdict.chain;
goto do_chain;
@@ -182,7 +198,8 @@
rulenum++;
/* fall through */
case NFT_RETURN:
- nft_trace_packet(pkt, chain, rulenum, NFT_TRACE_RETURN);
+ nft_trace_packet(&info, chain, rule,
+ rulenum, NFT_TRACETYPE_RETURN);
break;
default:
WARN_ON(1);
@@ -196,7 +213,8 @@
goto next_rule;
}
- nft_trace_packet(pkt, basechain, -1, NFT_TRACE_POLICY);
+ nft_trace_packet(&info, basechain, NULL, -1,
+ NFT_TRACETYPE_POLICY);
rcu_read_lock_bh();
stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats));
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
new file mode 100644
index 0000000..e9e959f
--- /dev/null
+++ b/net/netfilter/nf_tables_trace.c
@@ -0,0 +1,275 @@
+/*
+ * (C) 2015 Red Hat GmbH
+ * Author: Florian Westphal <fw@strlen.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/static_key.h>
+#include <linux/hash.h>
+#include <linux/jhash.h>
+#include <linux/if_vlan.h>
+#include <linux/init.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+#define NFT_TRACETYPE_LL_HSIZE 20
+#define NFT_TRACETYPE_NETWORK_HSIZE 40
+#define NFT_TRACETYPE_TRANSPORT_HSIZE 20
+
+DEFINE_STATIC_KEY_FALSE(nft_trace_enabled);
+EXPORT_SYMBOL_GPL(nft_trace_enabled);
+
+static int trace_fill_id(struct sk_buff *nlskb, struct sk_buff *skb)
+{
+ __be32 id;
+
+ /* using skb address as ID results in a limited number of
+ * values (and quick reuse).
+ *
+ * So we attempt to use as many skb members that will not
+ * change while skb is with netfilter.
+ */
+ id = (__be32)jhash_2words(hash32_ptr(skb), skb_get_hash(skb),
+ skb->skb_iif);
+
+ return nla_put_be32(nlskb, NFTA_TRACE_ID, id);
+}
+
+static int trace_fill_header(struct sk_buff *nlskb, u16 type,
+ const struct sk_buff *skb,
+ int off, unsigned int len)
+{
+ struct nlattr *nla;
+
+ if (len == 0)
+ return 0;
+
+ nla = nla_reserve(nlskb, type, len);
+ if (!nla || skb_copy_bits(skb, off, nla_data(nla), len))
+ return -1;
+
+ return 0;
+}
+
+static int nf_trace_fill_ll_header(struct sk_buff *nlskb,
+ const struct sk_buff *skb)
+{
+ struct vlan_ethhdr veth;
+ int off;
+
+ BUILD_BUG_ON(sizeof(veth) > NFT_TRACETYPE_LL_HSIZE);
+
+ off = skb_mac_header(skb) - skb->data;
+ if (off != -ETH_HLEN)
+ return -1;
+
+ if (skb_copy_bits(skb, off, &veth, ETH_HLEN))
+ return -1;
+
+ veth.h_vlan_proto = skb->vlan_proto;
+ veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb));
+ veth.h_vlan_encapsulated_proto = skb->protocol;
+
+ return nla_put(nlskb, NFTA_TRACE_LL_HEADER, sizeof(veth), &veth);
+}
+
+static int nf_trace_fill_dev_info(struct sk_buff *nlskb,
+ const struct net_device *indev,
+ const struct net_device *outdev)
+{
+ if (indev) {
+ if (nla_put_be32(nlskb, NFTA_TRACE_IIF,
+ htonl(indev->ifindex)))
+ return -1;
+
+ if (nla_put_be16(nlskb, NFTA_TRACE_IIFTYPE,
+ htons(indev->type)))
+ return -1;
+ }
+
+ if (outdev) {
+ if (nla_put_be32(nlskb, NFTA_TRACE_OIF,
+ htonl(outdev->ifindex)))
+ return -1;
+
+ if (nla_put_be16(nlskb, NFTA_TRACE_OIFTYPE,
+ htons(outdev->type)))
+ return -1;
+ }
+
+ return 0;
+}
+
+static int nf_trace_fill_pkt_info(struct sk_buff *nlskb,
+ const struct nft_pktinfo *pkt)
+{
+ const struct sk_buff *skb = pkt->skb;
+ unsigned int len = min_t(unsigned int,
+ pkt->xt.thoff - skb_network_offset(skb),
+ NFT_TRACETYPE_NETWORK_HSIZE);
+ int off = skb_network_offset(skb);
+
+ if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len))
+ return -1;
+
+ len = min_t(unsigned int, skb->len - pkt->xt.thoff,
+ NFT_TRACETYPE_TRANSPORT_HSIZE);
+
+ if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb,
+ pkt->xt.thoff, len))
+ return -1;
+
+ if (!skb_mac_header_was_set(skb))
+ return 0;
+
+ if (skb_vlan_tag_get(skb))
+ return nf_trace_fill_ll_header(nlskb, skb);
+
+ off = skb_mac_header(skb) - skb->data;
+ len = min_t(unsigned int, -off, NFT_TRACETYPE_LL_HSIZE);
+ return trace_fill_header(nlskb, NFTA_TRACE_LL_HEADER,
+ skb, off, len);
+}
+
+static int nf_trace_fill_rule_info(struct sk_buff *nlskb,
+ const struct nft_traceinfo *info)
+{
+ if (!info->rule)
+ return 0;
+
+ /* a continue verdict with ->type == RETURN means that this is
+ * an implicit return (end of chain reached).
+ *
+ * Since no rule matched, the ->rule pointer is invalid.
+ */
+ if (info->type == NFT_TRACETYPE_RETURN &&
+ info->verdict->code == NFT_CONTINUE)
+ return 0;
+
+ return nla_put_be64(nlskb, NFTA_TRACE_RULE_HANDLE,
+ cpu_to_be64(info->rule->handle));
+}
+
+void nft_trace_notify(struct nft_traceinfo *info)
+{
+ const struct nft_pktinfo *pkt = info->pkt;
+ struct nfgenmsg *nfmsg;
+ struct nlmsghdr *nlh;
+ struct sk_buff *skb;
+ unsigned int size;
+ int event = (NFNL_SUBSYS_NFTABLES << 8) | NFT_MSG_TRACE;
+
+ if (!nfnetlink_has_listeners(pkt->net, NFNLGRP_NFTRACE))
+ return;
+
+ size = nlmsg_total_size(sizeof(struct nfgenmsg)) +
+ nla_total_size(NFT_TABLE_MAXNAMELEN) +
+ nla_total_size(NFT_CHAIN_MAXNAMELEN) +
+ nla_total_size(sizeof(__be64)) + /* rule handle */
+ nla_total_size(sizeof(__be32)) + /* trace type */
+ nla_total_size(0) + /* VERDICT, nested */
+ nla_total_size(sizeof(u32)) + /* verdict code */
+ nla_total_size(NFT_CHAIN_MAXNAMELEN) + /* jump target */
+ nla_total_size(sizeof(u32)) + /* id */
+ nla_total_size(NFT_TRACETYPE_LL_HSIZE) +
+ nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) +
+ nla_total_size(NFT_TRACETYPE_TRANSPORT_HSIZE) +
+ nla_total_size(sizeof(u32)) + /* iif */
+ nla_total_size(sizeof(__be16)) + /* iiftype */
+ nla_total_size(sizeof(u32)) + /* oif */
+ nla_total_size(sizeof(__be16)) + /* oiftype */
+ nla_total_size(sizeof(u32)) + /* mark */
+ nla_total_size(sizeof(u32)) + /* nfproto */
+ nla_total_size(sizeof(u32)); /* policy */
+
+ skb = nlmsg_new(size, GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ nlh = nlmsg_put(skb, 0, 0, event, sizeof(struct nfgenmsg), 0);
+ if (!nlh)
+ goto nla_put_failure;
+
+ nfmsg = nlmsg_data(nlh);
+ nfmsg->nfgen_family = info->basechain->type->family;
+ nfmsg->version = NFNETLINK_V0;
+ nfmsg->res_id = 0;
+
+ if (nla_put_be32(skb, NFTA_TRACE_NFPROTO, htonl(pkt->pf)))
+ goto nla_put_failure;
+
+ if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type)))
+ goto nla_put_failure;
+
+ if (trace_fill_id(skb, pkt->skb))
+ goto nla_put_failure;
+
+ if (info->chain) {
+ if (nla_put_string(skb, NFTA_TRACE_CHAIN,
+ info->chain->name))
+ goto nla_put_failure;
+ if (nla_put_string(skb, NFTA_TRACE_TABLE,
+ info->chain->table->name))
+ goto nla_put_failure;
+ }
+
+ if (nf_trace_fill_rule_info(skb, info))
+ goto nla_put_failure;
+
+ switch (info->type) {
+ case NFT_TRACETYPE_UNSPEC:
+ case __NFT_TRACETYPE_MAX:
+ break;
+ case NFT_TRACETYPE_RETURN:
+ case NFT_TRACETYPE_RULE:
+ if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, info->verdict))
+ goto nla_put_failure;
+ break;
+ case NFT_TRACETYPE_POLICY:
+ if (nla_put_be32(skb, NFTA_TRACE_POLICY,
+ info->basechain->policy))
+ goto nla_put_failure;
+ break;
+ }
+
+ if (pkt->skb->mark &&
+ nla_put_be32(skb, NFTA_TRACE_MARK, htonl(pkt->skb->mark)))
+ goto nla_put_failure;
+
+ if (!info->packet_dumped) {
+ if (nf_trace_fill_dev_info(skb, pkt->in, pkt->out))
+ goto nla_put_failure;
+
+ if (nf_trace_fill_pkt_info(skb, pkt))
+ goto nla_put_failure;
+ info->packet_dumped = true;
+ }
+
+ nlmsg_end(skb, nlh);
+ nfnetlink_send(skb, pkt->net, 0, NFNLGRP_NFTRACE, 0, GFP_ATOMIC);
+ return;
+
+ nla_put_failure:
+ WARN_ON_ONCE(1);
+ kfree_skb(skb);
+}
+
+void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt,
+ const struct nft_verdict *verdict,
+ const struct nft_chain *chain)
+{
+ info->basechain = nft_base_chain(chain);
+ info->trace = true;
+ info->packet_dumped = false;
+ info->pkt = pkt;
+ info->verdict = verdict;
+}
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 77afe91..9ed4534 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -33,6 +33,10 @@
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NETFILTER);
+#define nfnl_dereference_protected(id) \
+ rcu_dereference_protected(table[(id)].subsys, \
+ lockdep_nfnl_is_held((id)))
+
static char __initdata nfversion[] = "0.30";
static struct {
@@ -49,6 +53,7 @@
[NFNLGRP_CONNTRACK_EXP_DESTROY] = NFNL_SUBSYS_CTNETLINK_EXP,
[NFNLGRP_NFTABLES] = NFNL_SUBSYS_NFTABLES,
[NFNLGRP_ACCT_QUOTA] = NFNL_SUBSYS_ACCT,
+ [NFNLGRP_NFTRACE] = NFNL_SUBSYS_NFTABLES,
};
void nfnl_lock(__u8 subsys_id)
@@ -207,8 +212,7 @@
} else {
rcu_read_unlock();
nfnl_lock(subsys_id);
- if (rcu_dereference_protected(table[subsys_id].subsys,
- lockdep_is_held(&table[subsys_id].mutex)) != ss ||
+ if (nfnl_dereference_protected(subsys_id) != ss ||
nfnetlink_find_client(type, ss) != nc)
err = -EAGAIN;
else if (nc->call)
@@ -296,15 +300,13 @@
return netlink_ack(oskb, nlh, -ENOMEM);
nfnl_lock(subsys_id);
- ss = rcu_dereference_protected(table[subsys_id].subsys,
- lockdep_is_held(&table[subsys_id].mutex));
+ ss = nfnl_dereference_protected(subsys_id);
if (!ss) {
#ifdef CONFIG_MODULES
nfnl_unlock(subsys_id);
request_module("nfnetlink-subsys-%d", subsys_id);
nfnl_lock(subsys_id);
- ss = rcu_dereference_protected(table[subsys_id].subsys,
- lockdep_is_held(&table[subsys_id].mutex));
+ ss = nfnl_dereference_protected(subsys_id);
if (!ss)
#endif
{
diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c
index c7a2d0e..3921d54 100644
--- a/net/netfilter/nfnetlink_cttimeout.c
+++ b/net/netfilter/nfnetlink_cttimeout.c
@@ -38,8 +38,6 @@
MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
MODULE_DESCRIPTION("cttimeout: Extended Netfilter Connection Tracking timeout tuning");
-static LIST_HEAD(cttimeout_list);
-
static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = {
[CTA_TIMEOUT_NAME] = { .type = NLA_NUL_STRING,
.len = CTNL_TIMEOUT_NAME_MAX - 1},
@@ -90,7 +88,7 @@
l3num = ntohs(nla_get_be16(cda[CTA_TIMEOUT_L3PROTO]));
l4num = nla_get_u8(cda[CTA_TIMEOUT_L4PROTO]);
- list_for_each_entry(timeout, &cttimeout_list, head) {
+ list_for_each_entry(timeout, &net->nfct_timeout_list, head) {
if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
continue;
@@ -145,7 +143,7 @@
timeout->l3num = l3num;
timeout->l4proto = l4proto;
atomic_set(&timeout->refcnt, 1);
- list_add_tail_rcu(&timeout->head, &cttimeout_list);
+ list_add_tail_rcu(&timeout->head, &net->nfct_timeout_list);
return 0;
err:
@@ -209,6 +207,7 @@
static int
ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
+ struct net *net = sock_net(skb->sk);
struct ctnl_timeout *cur, *last;
if (cb->args[2])
@@ -219,7 +218,7 @@
cb->args[1] = 0;
rcu_read_lock();
- list_for_each_entry_rcu(cur, &cttimeout_list, head) {
+ list_for_each_entry_rcu(cur, &net->nfct_timeout_list, head) {
if (last) {
if (cur != last)
continue;
@@ -245,6 +244,7 @@
const struct nlmsghdr *nlh,
const struct nlattr * const cda[])
{
+ struct net *net = sock_net(skb->sk);
int ret = -ENOENT;
char *name;
struct ctnl_timeout *cur;
@@ -260,7 +260,7 @@
return -EINVAL;
name = nla_data(cda[CTA_TIMEOUT_NAME]);
- list_for_each_entry(cur, &cttimeout_list, head) {
+ list_for_each_entry(cur, &net->nfct_timeout_list, head) {
struct sk_buff *skb2;
if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
@@ -301,17 +301,17 @@
RCU_INIT_POINTER(timeout_ext->timeout, NULL);
}
-static void ctnl_untimeout(struct ctnl_timeout *timeout)
+static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
{
struct nf_conntrack_tuple_hash *h;
const struct hlist_nulls_node *nn;
int i;
local_bh_disable();
- for (i = 0; i < init_net.ct.htable_size; i++) {
+ for (i = 0; i < net->ct.htable_size; i++) {
spin_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
- if (i < init_net.ct.htable_size) {
- hlist_nulls_for_each_entry(h, nn, &init_net.ct.hash[i], hnnode)
+ if (i < net->ct.htable_size) {
+ hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
untimeout(h, timeout);
}
spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
@@ -320,7 +320,7 @@
}
/* try to delete object, fail if it is still in use. */
-static int ctnl_timeout_try_del(struct ctnl_timeout *timeout)
+static int ctnl_timeout_try_del(struct net *net, struct ctnl_timeout *timeout)
{
int ret = 0;
@@ -329,7 +329,7 @@
/* We are protected by nfnl mutex. */
list_del_rcu(&timeout->head);
nf_ct_l4proto_put(timeout->l4proto);
- ctnl_untimeout(timeout);
+ ctnl_untimeout(net, timeout);
kfree_rcu(timeout, rcu_head);
} else {
/* still in use, restore reference counter. */
@@ -344,23 +344,24 @@
const struct nlmsghdr *nlh,
const struct nlattr * const cda[])
{
+ struct net *net = sock_net(skb->sk);
char *name;
struct ctnl_timeout *cur;
int ret = -ENOENT;
if (!cda[CTA_TIMEOUT_NAME]) {
- list_for_each_entry(cur, &cttimeout_list, head)
- ctnl_timeout_try_del(cur);
+ list_for_each_entry(cur, &net->nfct_timeout_list, head)
+ ctnl_timeout_try_del(net, cur);
return 0;
}
name = nla_data(cda[CTA_TIMEOUT_NAME]);
- list_for_each_entry(cur, &cttimeout_list, head) {
+ list_for_each_entry(cur, &net->nfct_timeout_list, head) {
if (strncmp(cur->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
continue;
- ret = ctnl_timeout_try_del(cur);
+ ret = ctnl_timeout_try_del(net, cur);
if (ret < 0)
return ret;
@@ -511,12 +512,13 @@
}
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
-static struct ctnl_timeout *ctnl_timeout_find_get(const char *name)
+static struct ctnl_timeout *
+ctnl_timeout_find_get(struct net *net, const char *name)
{
struct ctnl_timeout *timeout, *matching = NULL;
rcu_read_lock();
- list_for_each_entry_rcu(timeout, &cttimeout_list, head) {
+ list_for_each_entry_rcu(timeout, &net->nfct_timeout_list, head) {
if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0)
continue;
@@ -569,10 +571,39 @@
MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_TIMEOUT);
+static int __net_init cttimeout_net_init(struct net *net)
+{
+ INIT_LIST_HEAD(&net->nfct_timeout_list);
+
+ return 0;
+}
+
+static void __net_exit cttimeout_net_exit(struct net *net)
+{
+ struct ctnl_timeout *cur, *tmp;
+
+ ctnl_untimeout(net, NULL);
+
+ list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) {
+ list_del_rcu(&cur->head);
+ nf_ct_l4proto_put(cur->l4proto);
+ kfree_rcu(cur, rcu_head);
+ }
+}
+
+static struct pernet_operations cttimeout_ops = {
+ .init = cttimeout_net_init,
+ .exit = cttimeout_net_exit,
+};
+
static int __init cttimeout_init(void)
{
int ret;
+ ret = register_pernet_subsys(&cttimeout_ops);
+ if (ret < 0)
+ return ret;
+
ret = nfnetlink_subsys_register(&cttimeout_subsys);
if (ret < 0) {
pr_err("cttimeout_init: cannot register cttimeout with "
@@ -586,28 +617,17 @@
return 0;
err_out:
+ unregister_pernet_subsys(&cttimeout_ops);
return ret;
}
static void __exit cttimeout_exit(void)
{
- struct ctnl_timeout *cur, *tmp;
-
pr_info("cttimeout: unregistering from nfnetlink.\n");
nfnetlink_subsys_unregister(&cttimeout_subsys);
- /* Make sure no conntrack objects refer to custom timeouts anymore. */
- ctnl_untimeout(NULL);
-
- list_for_each_entry_safe(cur, tmp, &cttimeout_list, head) {
- list_del_rcu(&cur->head);
- /* We are sure that our objects have no clients at this point,
- * it's safe to release them all without checking refcnt.
- */
- nf_ct_l4proto_put(cur->l4proto);
- kfree_rcu(cur, rcu_head);
- }
+ unregister_pernet_subsys(&cttimeout_ops);
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
RCU_INIT_POINTER(nf_ct_timeout_find_get_hook, NULL);
RCU_INIT_POINTER(nf_ct_timeout_put_hook, NULL);
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 740cce4..70b6bd3 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -293,24 +293,20 @@
return status;
}
-static int
+static void
nfulnl_set_timeout(struct nfulnl_instance *inst, u_int32_t timeout)
{
spin_lock_bh(&inst->lock);
inst->flushtimeout = timeout;
spin_unlock_bh(&inst->lock);
-
- return 0;
}
-static int
+static void
nfulnl_set_qthresh(struct nfulnl_instance *inst, u_int32_t qthresh)
{
spin_lock_bh(&inst->lock);
inst->qthreshold = qthresh;
spin_unlock_bh(&inst->lock);
-
- return 0;
}
static int
@@ -1064,15 +1060,26 @@
{
unsigned int i;
struct nfnl_log_net *log = nfnl_log_pernet(net);
+#ifdef CONFIG_PROC_FS
+ struct proc_dir_entry *proc;
+ kuid_t root_uid;
+ kgid_t root_gid;
+#endif
for (i = 0; i < INSTANCE_BUCKETS; i++)
INIT_HLIST_HEAD(&log->instance_table[i]);
spin_lock_init(&log->instances_lock);
#ifdef CONFIG_PROC_FS
- if (!proc_create("nfnetlink_log", 0440,
- net->nf.proc_netfilter, &nful_file_ops))
+ proc = proc_create("nfnetlink_log", 0440,
+ net->nf.proc_netfilter, &nful_file_ops);
+ if (!proc)
return -ENOMEM;
+
+ root_uid = make_kuid(net->user_ns, 0);
+ root_gid = make_kgid(net->user_ns, 0);
+ if (uid_valid(root_uid) && gid_valid(root_gid))
+ proc_set_user(proc, root_uid, root_gid);
#endif
return 0;
}
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 1915cab..fe885bf 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -18,12 +18,16 @@
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/smp.h>
+#include <linux/static_key.h>
#include <net/dst.h>
#include <net/sock.h>
#include <net/tcp_states.h> /* for TCP_TIME_WAIT */
#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nft_meta.h>
+#include <uapi/linux/netfilter_bridge.h> /* NF_BR_PRE_ROUTING */
+
void nft_meta_get_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -188,6 +192,13 @@
}
EXPORT_SYMBOL_GPL(nft_meta_get_eval);
+/* don't change or set _LOOPBACK, _USER, etc. */
+static bool pkt_type_ok(u32 p)
+{
+ return p == PACKET_HOST || p == PACKET_BROADCAST ||
+ p == PACKET_MULTICAST || p == PACKET_OTHERHOST;
+}
+
void nft_meta_set_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt)
@@ -203,6 +214,11 @@
case NFT_META_PRIORITY:
skb->priority = value;
break;
+ case NFT_META_PKTTYPE:
+ if (skb->pkt_type != value &&
+ pkt_type_ok(value) && pkt_type_ok(skb->pkt_type))
+ skb->pkt_type = value;
+ break;
case NFT_META_NFTRACE:
skb->nf_trace = 1;
break;
@@ -271,6 +287,24 @@
}
EXPORT_SYMBOL_GPL(nft_meta_get_init);
+static int nft_meta_set_init_pkttype(const struct nft_ctx *ctx)
+{
+ unsigned int hooks;
+
+ switch (ctx->afi->family) {
+ case NFPROTO_BRIDGE:
+ hooks = 1 << NF_BR_PRE_ROUTING;
+ break;
+ case NFPROTO_NETDEV:
+ hooks = 1 << NF_NETDEV_INGRESS;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return nft_chain_validate_hooks(ctx->chain, hooks);
+}
+
int nft_meta_set_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[])
@@ -288,6 +322,12 @@
case NFT_META_NFTRACE:
len = sizeof(u8);
break;
+ case NFT_META_PKTTYPE:
+ err = nft_meta_set_init_pkttype(ctx);
+ if (err)
+ return err;
+ len = sizeof(u8);
+ break;
default:
return -EOPNOTSUPP;
}
@@ -297,6 +337,9 @@
if (err < 0)
return err;
+ if (priv->key == NFT_META_NFTRACE)
+ static_branch_inc(&nft_trace_enabled);
+
return 0;
}
EXPORT_SYMBOL_GPL(nft_meta_set_init);
@@ -334,6 +377,16 @@
}
EXPORT_SYMBOL_GPL(nft_meta_set_dump);
+void nft_meta_set_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ const struct nft_meta *priv = nft_expr_priv(expr);
+
+ if (priv->key == NFT_META_NFTRACE)
+ static_branch_dec(&nft_trace_enabled);
+}
+EXPORT_SYMBOL_GPL(nft_meta_set_destroy);
+
static struct nft_expr_type nft_meta_type;
static const struct nft_expr_ops nft_meta_get_ops = {
.type = &nft_meta_type,
@@ -348,6 +401,7 @@
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
.eval = nft_meta_set_eval,
.init = nft_meta_set_init,
+ .destroy = nft_meta_set_destroy,
.dump = nft_meta_set_dump,
};
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 09b4b07..12cd4bf 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -107,10 +107,13 @@
}
static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = {
- [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 },
- [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 },
- [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 },
- [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_SREG] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_CSUM_TYPE] = { .type = NLA_U32 },
+ [NFTA_PAYLOAD_CSUM_OFFSET] = { .type = NLA_U32 },
};
static int nft_payload_init(const struct nft_ctx *ctx,
@@ -160,6 +163,118 @@
.dump = nft_payload_dump,
};
+static void nft_payload_set_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_payload_set *priv = nft_expr_priv(expr);
+ struct sk_buff *skb = pkt->skb;
+ const u32 *src = ®s->data[priv->sreg];
+ int offset, csum_offset;
+ __wsum fsum, tsum;
+ __sum16 sum;
+
+ switch (priv->base) {
+ case NFT_PAYLOAD_LL_HEADER:
+ if (!skb_mac_header_was_set(skb))
+ goto err;
+ offset = skb_mac_header(skb) - skb->data;
+ break;
+ case NFT_PAYLOAD_NETWORK_HEADER:
+ offset = skb_network_offset(skb);
+ break;
+ case NFT_PAYLOAD_TRANSPORT_HEADER:
+ offset = pkt->xt.thoff;
+ break;
+ default:
+ BUG();
+ }
+
+ csum_offset = offset + priv->csum_offset;
+ offset += priv->offset;
+
+ if (priv->csum_type == NFT_PAYLOAD_CSUM_INET &&
+ (priv->base != NFT_PAYLOAD_TRANSPORT_HEADER ||
+ skb->ip_summed != CHECKSUM_PARTIAL)) {
+ if (skb_copy_bits(skb, csum_offset, &sum, sizeof(sum)) < 0)
+ goto err;
+
+ fsum = skb_checksum(skb, offset, priv->len, 0);
+ tsum = csum_partial(src, priv->len, 0);
+ sum = csum_fold(csum_add(csum_sub(~csum_unfold(sum), fsum),
+ tsum));
+ if (sum == 0)
+ sum = CSUM_MANGLED_0;
+
+ if (!skb_make_writable(skb, csum_offset + sizeof(sum)) ||
+ skb_store_bits(skb, csum_offset, &sum, sizeof(sum)) < 0)
+ goto err;
+ }
+
+ if (!skb_make_writable(skb, max(offset + priv->len, 0)) ||
+ skb_store_bits(skb, offset, src, priv->len) < 0)
+ goto err;
+
+ return;
+err:
+ regs->verdict.code = NFT_BREAK;
+}
+
+static int nft_payload_set_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_payload_set *priv = nft_expr_priv(expr);
+
+ priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
+ priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
+ priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+ priv->sreg = nft_parse_register(tb[NFTA_PAYLOAD_SREG]);
+
+ if (tb[NFTA_PAYLOAD_CSUM_TYPE])
+ priv->csum_type =
+ ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_TYPE]));
+ if (tb[NFTA_PAYLOAD_CSUM_OFFSET])
+ priv->csum_offset =
+ ntohl(nla_get_be32(tb[NFTA_PAYLOAD_CSUM_OFFSET]));
+
+ switch (priv->csum_type) {
+ case NFT_PAYLOAD_CSUM_NONE:
+ case NFT_PAYLOAD_CSUM_INET:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return nft_validate_register_load(priv->sreg, priv->len);
+}
+
+static int nft_payload_set_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_payload_set *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_PAYLOAD_SREG, priv->sreg) ||
+ nla_put_be32(skb, NFTA_PAYLOAD_BASE, htonl(priv->base)) ||
+ nla_put_be32(skb, NFTA_PAYLOAD_OFFSET, htonl(priv->offset)) ||
+ nla_put_be32(skb, NFTA_PAYLOAD_LEN, htonl(priv->len)) ||
+ nla_put_be32(skb, NFTA_PAYLOAD_CSUM_TYPE, htonl(priv->csum_type)) ||
+ nla_put_be32(skb, NFTA_PAYLOAD_CSUM_OFFSET,
+ htonl(priv->csum_offset)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static const struct nft_expr_ops nft_payload_set_ops = {
+ .type = &nft_payload_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_payload_set)),
+ .eval = nft_payload_set_eval,
+ .init = nft_payload_set_init,
+ .dump = nft_payload_set_dump,
+};
+
static const struct nft_expr_ops *
nft_payload_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
@@ -167,8 +282,7 @@
enum nft_payload_bases base;
unsigned int offset, len;
- if (tb[NFTA_PAYLOAD_DREG] == NULL ||
- tb[NFTA_PAYLOAD_BASE] == NULL ||
+ if (tb[NFTA_PAYLOAD_BASE] == NULL ||
tb[NFTA_PAYLOAD_OFFSET] == NULL ||
tb[NFTA_PAYLOAD_LEN] == NULL)
return ERR_PTR(-EINVAL);
@@ -183,6 +297,15 @@
return ERR_PTR(-EOPNOTSUPP);
}
+ if (tb[NFTA_PAYLOAD_SREG] != NULL) {
+ if (tb[NFTA_PAYLOAD_DREG] != NULL)
+ return ERR_PTR(-EINVAL);
+ return &nft_payload_set_ops;
+ }
+
+ if (tb[NFTA_PAYLOAD_DREG] == NULL)
+ return ERR_PTR(-EINVAL);
+
offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index d4aaad7..c8a0b7d 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -26,6 +26,7 @@
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/audit.h>
+#include <linux/user_namespace.h>
#include <net/net_namespace.h>
#include <linux/netfilter/x_tables.h>
@@ -1226,6 +1227,8 @@
#ifdef CONFIG_PROC_FS
char buf[XT_FUNCTION_MAXNAMELEN];
struct proc_dir_entry *proc;
+ kuid_t root_uid;
+ kgid_t root_gid;
#endif
if (af >= ARRAY_SIZE(xt_prefix))
@@ -1233,12 +1236,17 @@
#ifdef CONFIG_PROC_FS
+ root_uid = make_kuid(net->user_ns, 0);
+ root_gid = make_kgid(net->user_ns, 0);
+
strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TABLES, sizeof(buf));
proc = proc_create_data(buf, 0440, net->proc_net, &xt_table_ops,
(void *)(unsigned long)af);
if (!proc)
goto out;
+ if (uid_valid(root_uid) && gid_valid(root_gid))
+ proc_set_user(proc, root_uid, root_gid);
strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_MATCHES, sizeof(buf));
@@ -1246,6 +1254,8 @@
(void *)(unsigned long)af);
if (!proc)
goto out_remove_tables;
+ if (uid_valid(root_uid) && gid_valid(root_gid))
+ proc_set_user(proc, root_uid, root_gid);
strlcpy(buf, xt_prefix[af], sizeof(buf));
strlcat(buf, FORMAT_TARGETS, sizeof(buf));
@@ -1253,6 +1263,8 @@
(void *)(unsigned long)af);
if (!proc)
goto out_remove_matches;
+ if (uid_valid(root_uid) && gid_valid(root_gid))
+ proc_set_user(proc, root_uid, root_gid);
#endif
return 0;
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index e7ac07e..6669e68 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -143,7 +143,7 @@
goto out;
}
- timeout = timeout_find_get(timeout_name);
+ timeout = timeout_find_get(par->net, timeout_name);
if (timeout == NULL) {
ret = -ENOENT;
pr_info("No such timeout policy \"%s\"\n", timeout_name);
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
index 54eaeb4..a086a91 100644
--- a/net/netfilter/xt_cgroup.c
+++ b/net/netfilter/xt_cgroup.c
@@ -24,9 +24,9 @@
MODULE_ALIAS("ipt_cgroup");
MODULE_ALIAS("ip6t_cgroup");
-static int cgroup_mt_check(const struct xt_mtchk_param *par)
+static int cgroup_mt_check_v0(const struct xt_mtchk_param *par)
{
- struct xt_cgroup_info *info = par->matchinfo;
+ struct xt_cgroup_info_v0 *info = par->matchinfo;
if (info->invert & ~1)
return -EINVAL;
@@ -34,10 +34,41 @@
return 0;
}
-static bool
-cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par)
+static int cgroup_mt_check_v1(const struct xt_mtchk_param *par)
{
- const struct xt_cgroup_info *info = par->matchinfo;
+ struct xt_cgroup_info_v1 *info = par->matchinfo;
+ struct cgroup *cgrp;
+
+ if ((info->invert_path & ~1) || (info->invert_classid & ~1))
+ return -EINVAL;
+
+ if (!info->has_path && !info->has_classid) {
+ pr_info("xt_cgroup: no path or classid specified\n");
+ return -EINVAL;
+ }
+
+ if (info->has_path && info->has_classid) {
+ pr_info("xt_cgroup: both path and classid specified\n");
+ return -EINVAL;
+ }
+
+ if (info->has_path) {
+ cgrp = cgroup_get_from_path(info->path);
+ if (IS_ERR(cgrp)) {
+ pr_info("xt_cgroup: invalid path, errno=%ld\n",
+ PTR_ERR(cgrp));
+ return -EINVAL;
+ }
+ info->priv = cgrp;
+ }
+
+ return 0;
+}
+
+static bool
+cgroup_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_cgroup_info_v0 *info = par->matchinfo;
if (skb->sk == NULL || !sk_fullsock(skb->sk))
return false;
@@ -46,27 +77,67 @@
info->invert;
}
-static struct xt_match cgroup_mt_reg __read_mostly = {
- .name = "cgroup",
- .revision = 0,
- .family = NFPROTO_UNSPEC,
- .checkentry = cgroup_mt_check,
- .match = cgroup_mt,
- .matchsize = sizeof(struct xt_cgroup_info),
- .me = THIS_MODULE,
- .hooks = (1 << NF_INET_LOCAL_OUT) |
- (1 << NF_INET_POST_ROUTING) |
- (1 << NF_INET_LOCAL_IN),
+static bool cgroup_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_cgroup_info_v1 *info = par->matchinfo;
+ struct sock_cgroup_data *skcd = &skb->sk->sk_cgrp_data;
+ struct cgroup *ancestor = info->priv;
+
+ if (!skb->sk || !sk_fullsock(skb->sk))
+ return false;
+
+ if (ancestor)
+ return cgroup_is_descendant(sock_cgroup_ptr(skcd), ancestor) ^
+ info->invert_path;
+ else
+ return (info->classid == sock_cgroup_classid(skcd)) ^
+ info->invert_classid;
+}
+
+static void cgroup_mt_destroy_v1(const struct xt_mtdtor_param *par)
+{
+ struct xt_cgroup_info_v1 *info = par->matchinfo;
+
+ if (info->priv)
+ cgroup_put(info->priv);
+}
+
+static struct xt_match cgroup_mt_reg[] __read_mostly = {
+ {
+ .name = "cgroup",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = cgroup_mt_check_v0,
+ .match = cgroup_mt_v0,
+ .matchsize = sizeof(struct xt_cgroup_info_v0),
+ .me = THIS_MODULE,
+ .hooks = (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_IN),
+ },
+ {
+ .name = "cgroup",
+ .revision = 1,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = cgroup_mt_check_v1,
+ .match = cgroup_mt_v1,
+ .matchsize = sizeof(struct xt_cgroup_info_v1),
+ .destroy = cgroup_mt_destroy_v1,
+ .me = THIS_MODULE,
+ .hooks = (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_IN),
+ },
};
static int __init cgroup_mt_init(void)
{
- return xt_register_match(&cgroup_mt_reg);
+ return xt_register_matches(cgroup_mt_reg, ARRAY_SIZE(cgroup_mt_reg));
}
static void __exit cgroup_mt_exit(void)
{
- xt_unregister_match(&cgroup_mt_reg);
+ xt_unregister_matches(cgroup_mt_reg, ARRAY_SIZE(cgroup_mt_reg));
}
module_init(cgroup_mt_init);
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 3e88922..8c106d1 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -305,10 +305,10 @@
u16 zone, struct sk_buff *skb)
{
struct ovs_skb_cb ovs_cb = *OVS_CB(skb);
+ int err;
if (key->eth.type == htons(ETH_P_IP)) {
enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
- int err;
memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
err = ip_defrag(net, skb, user);
@@ -319,28 +319,13 @@
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
} else if (key->eth.type == htons(ETH_P_IPV6)) {
enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
- struct sk_buff *reasm;
memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
- reasm = nf_ct_frag6_gather(net, skb, user);
- if (!reasm)
- return -EINPROGRESS;
+ err = nf_ct_frag6_gather(net, skb, user);
+ if (err)
+ return err;
- if (skb == reasm) {
- kfree_skb(skb);
- return -EINVAL;
- }
-
- /* Don't free 'skb' even though it is one of the original
- * fragments, as we're going to morph it into the head.
- */
- skb_get(skb);
- nf_ct_frag6_consume_orig(reasm);
-
- key->ip.proto = ipv6_hdr(reasm)->nexthdr;
- skb_morph(skb, reasm);
- skb->next = reasm->next;
- consume_skb(reasm);
+ key->ip.proto = ipv6_hdr(skb)->nexthdr;
ovs_cb.mru = IP6CB(skb)->frag_max_size;
#endif
} else {