Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says:
====================
Netfilter updates for net-next
The following patchset contains Netfilter updates for your net-next
tree, they are:
1) Consolidate GRE protocol tracker using new GRE protocol definitions,
patches from Gao Feng.
2) Properly parse continuation lines in SIP helper, update allowed
characters in Call-ID header and allow tabs in SIP headers as
specified by RFC3261, from Marco Angaroni.
3) Remove useless code in FTP conntrack helper, also from Gao Feng.
4) Add number generation expression for nf_tables, with random and
incremental generators. This also includes specific offset to add
to the result, patches from Laura Garcia Liebana. Liping Zhang
follows with a fix to avoid a race in this new expression.
5) Fix new quota expression inversion logic, added in the previous
pull request.
6) Missing validation of queue configuration in nft_queue, patch
from Liping Zhang.
7) Remove unused ctl_table_path, as part of the deprecation of the
ip_conntrack sysctl interface coming in the previous batch.
Again from Liping Zhang.
8) Add offset attribute to nft_hash expression, so we can generate
any output from a specific base offset. Moreover, check for
possible overflow, patches from Laura Garcia.
9) Allow to invert dynamic set insertion from packet path, to check
for overflows in case the set is full.
10) Revisit nft_set_pktinfo*() logic from nf_tables to ensure
proper initialization of layer 4 protocol. Consolidate pktinfo
structure initialization for bridge and netdev families.
11) Do not inconditionally drop IPv6 packets that we cannot parse
transport protocol for ip6 and inet families, let the user decide
on this via ruleset policy.
12) Get rid of gotos in __nf_ct_try_assign_helper().
13) Check for return value in register_netdevice_notifier() and
nft_register_chain_type(), patches from Gao Feng.
14) Get rid of CONFIG_IP6_NF_IPTABLES dependency in nf_queue
infrastructure that is common to nf_tables, from Liping Zhang.
15) Disable 'found' and 'searched' stats that are updates from the
packet hotpath, not very useful these days.
16) Validate maximum value of u32 netlink attributes in nf_tables,
this introduces nft_parse_u32_check(). From Laura Garcia.
17) Add missing code to integrate nft_queue with maps, patch from
Liping Zhang. This also includes missing support ranges in
nft_queue bridge family.
18) Fix check in nft_payload_fast_eval() that ensure that we don't
go over the skbuff data boundary, from Liping Zhang.
19) Check if transport protocol is set from nf_tables tracing and
payload expression. Again from Liping Zhang.
20) Use net_get_random_once() whenever possible, from Gao Feng.
21) Replace hardcoded value by sizeof() in xt_helper, from Gao Feng.
22) Remove superfluous check for found element in nft_lookup.
23) Simplify TCPMSS logic to check for minimum MTU, from Gao Feng.
24) Replace double linked list by single linked list in Netfilter
core hook infrastructure, patchset from Aaron Conole. This
includes several patches to prepare this update.
25) Fix wrong sequence adjustment of TCP RST with no ACK, from
Gao Feng.
26) Relax check for direction attribute in nft_ct for layer 3 and 4
protocol fields, from Liping Zhang.
27) Add new revision for hashlimit to support higher pps of upto 1
million, from Vishwanath Pai.
28) Evict stale entries in nf_conntrack when reading entries from
/proc/net/nf_conntrack, from Florian Westphal.
29) Fix transparent match for IPv6 request sockets, from Krisztian
Kovacs.
30) Add new range expression for nf_tables.
31) Add missing code to support for flags in nft_log. Expose NF_LOG_*
flags via uapi and use it from the generic logging infrastructure,
instead of using xt specific definitions, from Liping Zhang.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1e8a5c7..136ae6bb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1799,7 +1799,7 @@
#endif
struct netdev_queue __rcu *ingress_queue;
#ifdef CONFIG_NETFILTER_INGRESS
- struct list_head nf_hooks_ingress;
+ struct nf_hook_entry __rcu *nf_hooks_ingress;
#endif
unsigned char broadcast[MAX_ADDR_LEN];
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index e82b767..abc7fdc 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -55,12 +55,34 @@
struct net_device *out;
struct sock *sk;
struct net *net;
- struct list_head *hook_list;
+ struct nf_hook_entry __rcu *hook_entries;
int (*okfn)(struct net *, struct sock *, struct sk_buff *);
};
+typedef unsigned int nf_hookfn(void *priv,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state);
+struct nf_hook_ops {
+ struct list_head list;
+
+ /* User fills in from here down. */
+ nf_hookfn *hook;
+ struct net_device *dev;
+ void *priv;
+ u_int8_t pf;
+ unsigned int hooknum;
+ /* Hooks are ordered in ascending priority. */
+ int priority;
+};
+
+struct nf_hook_entry {
+ struct nf_hook_entry __rcu *next;
+ struct nf_hook_ops ops;
+ const struct nf_hook_ops *orig_ops;
+};
+
static inline void nf_hook_state_init(struct nf_hook_state *p,
- struct list_head *hook_list,
+ struct nf_hook_entry *hook_entry,
unsigned int hook,
int thresh, u_int8_t pf,
struct net_device *indev,
@@ -76,26 +98,11 @@
p->out = outdev;
p->sk = sk;
p->net = net;
- p->hook_list = hook_list;
+ RCU_INIT_POINTER(p->hook_entries, hook_entry);
p->okfn = okfn;
}
-typedef unsigned int nf_hookfn(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state);
-struct nf_hook_ops {
- struct list_head list;
-
- /* User fills in from here down. */
- nf_hookfn *hook;
- struct net_device *dev;
- void *priv;
- u_int8_t pf;
- unsigned int hooknum;
- /* Hooks are ordered in ascending priority. */
- int priority;
-};
struct nf_sockopt_ops {
struct list_head list;
@@ -163,7 +170,8 @@
int (*okfn)(struct net *, struct sock *, struct sk_buff *),
int thresh)
{
- struct list_head *hook_list;
+ struct nf_hook_entry *hook_head;
+ int ret = 1;
#ifdef HAVE_JUMP_LABEL
if (__builtin_constant_p(pf) &&
@@ -172,16 +180,19 @@
return 1;
#endif
- hook_list = &net->nf.hooks[pf][hook];
-
- if (!list_empty(hook_list)) {
+ rcu_read_lock();
+ hook_head = rcu_dereference(net->nf.hooks[pf][hook]);
+ if (hook_head) {
struct nf_hook_state state;
- nf_hook_state_init(&state, hook_list, hook, thresh,
+ nf_hook_state_init(&state, hook_head, hook, thresh,
pf, indev, outdev, sk, net, okfn);
- return nf_hook_slow(skb, &state);
+
+ ret = nf_hook_slow(skb, &state);
}
- return 1;
+ rcu_read_unlock();
+
+ return ret;
}
static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 2755057..1d1ef4e 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -4,13 +4,9 @@
#include <uapi/linux/netfilter/nf_conntrack_common.h>
struct ip_conntrack_stat {
- unsigned int searched;
unsigned int found;
- unsigned int new;
unsigned int invalid;
unsigned int ignore;
- unsigned int delete;
- unsigned int delete_list;
unsigned int insert;
unsigned int insert_failed;
unsigned int drop;
diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h
index df78dc2..dee0acd 100644
--- a/include/linux/netfilter/nf_conntrack_proto_gre.h
+++ b/include/linux/netfilter/nf_conntrack_proto_gre.h
@@ -1,68 +1,8 @@
#ifndef _CONNTRACK_PROTO_GRE_H
#define _CONNTRACK_PROTO_GRE_H
#include <asm/byteorder.h>
-
-/* GRE PROTOCOL HEADER */
-
-/* GRE Version field */
-#define GRE_VERSION_1701 0x0
-#define GRE_VERSION_PPTP 0x1
-
-/* GRE Protocol field */
-#define GRE_PROTOCOL_PPTP 0x880B
-
-/* GRE Flags */
-#define GRE_FLAG_C 0x80
-#define GRE_FLAG_R 0x40
-#define GRE_FLAG_K 0x20
-#define GRE_FLAG_S 0x10
-#define GRE_FLAG_A 0x80
-
-#define GRE_IS_C(f) ((f)&GRE_FLAG_C)
-#define GRE_IS_R(f) ((f)&GRE_FLAG_R)
-#define GRE_IS_K(f) ((f)&GRE_FLAG_K)
-#define GRE_IS_S(f) ((f)&GRE_FLAG_S)
-#define GRE_IS_A(f) ((f)&GRE_FLAG_A)
-
-/* GRE is a mess: Four different standards */
-struct gre_hdr {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
- __u16 rec:3,
- srr:1,
- seq:1,
- key:1,
- routing:1,
- csum:1,
- version:3,
- reserved:4,
- ack:1;
-#elif defined(__BIG_ENDIAN_BITFIELD)
- __u16 csum:1,
- routing:1,
- key:1,
- seq:1,
- srr:1,
- rec:3,
- ack:1,
- reserved:4,
- version:3;
-#else
-#error "Adjust your <asm/byteorder.h> defines"
-#endif
- __be16 protocol;
-};
-
-/* modified GRE header for PPTP */
-struct gre_hdr_pptp {
- __u8 flags; /* bitfield */
- __u8 version; /* should be GRE_VERSION_PPTP */
- __be16 protocol; /* should be GRE_PROTOCOL_PPTP */
- __be16 payload_len; /* size of ppp payload, not inc. gre header */
- __be16 call_id; /* peer's call_id for this session */
- __be32 seq; /* sequence number. Present if S==1 */
- __be32 ack; /* seq number of highest packet received by */
- /* sender in this session */
-};
+#include <net/gre.h>
+#include <net/pptp.h>
struct nf_ct_gre {
unsigned int stream_timeout;
diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h
index 5fcd375..33e37fb 100644
--- a/include/linux/netfilter_ingress.h
+++ b/include/linux/netfilter_ingress.h
@@ -11,22 +11,30 @@
if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS]))
return false;
#endif
- return !list_empty(&skb->dev->nf_hooks_ingress);
+ return rcu_access_pointer(skb->dev->nf_hooks_ingress);
}
+/* caller must hold rcu_read_lock */
static inline int nf_hook_ingress(struct sk_buff *skb)
{
+ struct nf_hook_entry *e = rcu_dereference(skb->dev->nf_hooks_ingress);
struct nf_hook_state state;
- nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress,
- NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV,
- skb->dev, NULL, NULL, dev_net(skb->dev), NULL);
+ /* Must recheck the ingress hook head, in the event it became NULL
+ * after the check in nf_hook_ingress_active evaluated to true.
+ */
+ if (unlikely(!e))
+ return 0;
+
+ nf_hook_state_init(&state, e, NF_NETDEV_INGRESS, INT_MIN,
+ NFPROTO_NETDEV, skb->dev, NULL, NULL,
+ dev_net(skb->dev), NULL);
return nf_hook_slow(skb, &state);
}
static inline void nf_hook_ingress_init(struct net_device *dev)
{
- INIT_LIST_HEAD(&dev->nf_hooks_ingress);
+ RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL);
}
#else /* CONFIG_NETFILTER_INGRESS */
static inline int nf_hook_ingress_active(struct sk_buff *skb)
diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h
index e8d1448..0b0c35c 100644
--- a/include/net/netfilter/br_netfilter.h
+++ b/include/net/netfilter/br_netfilter.h
@@ -15,6 +15,12 @@
void nf_bridge_update_protocol(struct sk_buff *skb);
+int br_nf_hook_thresh(unsigned int hook, struct net *net, struct sock *sk,
+ struct sk_buff *skb, struct net_device *indev,
+ struct net_device *outdev,
+ int (*okfn)(struct net *, struct sock *,
+ struct sk_buff *));
+
static inline struct nf_bridge_info *
nf_bridge_info_get(const struct sk_buff *skb)
{
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index cdc920b..8992e42 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -63,10 +63,6 @@
size_t nla_size;
-#ifdef CONFIG_SYSCTL
- const char *ctl_table_path;
-#endif /* CONFIG_SYSCTL */
-
/* Init l3proto pernet data */
int (*init_net)(struct net *net);
diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h
index ee07dc8..309cd26 100644
--- a/include/net/netfilter/nf_log.h
+++ b/include/net/netfilter/nf_log.h
@@ -2,15 +2,10 @@
#define _NF_LOG_H
#include <linux/netfilter.h>
+#include <linux/netfilter/nf_log.h>
-/* those NF_LOG_* defines and struct nf_loginfo are legacy definitios that will
- * disappear once iptables is replaced with pkttables. Please DO NOT use them
- * for any new code! */
-#define NF_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */
-#define NF_LOG_TCPOPT 0x02 /* Log TCP options */
-#define NF_LOG_IPOPT 0x04 /* Log IP options */
-#define NF_LOG_UID 0x08 /* Log UID owning local socket */
-#define NF_LOG_MASK 0x0f
+/* Log tcp sequence, tcp options, ip options and uid owning local socket */
+#define NF_LOG_DEFAULT_MASK 0x0f
/* This flag indicates that copy_len field in nf_loginfo is set */
#define NF_LOG_F_COPY_LEN 0x1
diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index 0dbce55..2280cfe 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -11,7 +11,6 @@
struct sk_buff *skb;
unsigned int id;
- struct nf_hook_ops *elem;
struct nf_hook_state state;
u16 size; /* sizeof(entry) + saved route keys */
@@ -22,10 +21,10 @@
/* Packet queuing */
struct nf_queue_handler {
- int (*outfn)(struct nf_queue_entry *entry,
- unsigned int queuenum);
- void (*nf_hook_drop)(struct net *net,
- struct nf_hook_ops *ops);
+ int (*outfn)(struct nf_queue_entry *entry,
+ unsigned int queuenum);
+ void (*nf_hook_drop)(struct net *net,
+ const struct nf_hook_entry *hooks);
};
void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh);
@@ -41,23 +40,19 @@
*jhash_initval = prandom_u32();
}
-static inline u32 hash_v4(const struct sk_buff *skb, u32 jhash_initval)
+static inline u32 hash_v4(const struct iphdr *iph, u32 initval)
{
- const struct iphdr *iph = ip_hdr(skb);
-
/* packets in either direction go into same queue */
if ((__force u32)iph->saddr < (__force u32)iph->daddr)
return jhash_3words((__force u32)iph->saddr,
- (__force u32)iph->daddr, iph->protocol, jhash_initval);
+ (__force u32)iph->daddr, iph->protocol, initval);
return jhash_3words((__force u32)iph->daddr,
- (__force u32)iph->saddr, iph->protocol, jhash_initval);
+ (__force u32)iph->saddr, iph->protocol, initval);
}
-#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
-static inline u32 hash_v6(const struct sk_buff *skb, u32 jhash_initval)
+static inline u32 hash_v6(const struct ipv6hdr *ip6h, u32 initval)
{
- const struct ipv6hdr *ip6h = ipv6_hdr(skb);
u32 a, b, c;
if ((__force u32)ip6h->saddr.s6_addr32[3] <
@@ -75,20 +70,50 @@
else
c = (__force u32) ip6h->daddr.s6_addr32[1];
- return jhash_3words(a, b, c, jhash_initval);
+ return jhash_3words(a, b, c, initval);
}
-#endif
+
+static inline u32 hash_bridge(const struct sk_buff *skb, u32 initval)
+{
+ struct ipv6hdr *ip6h, _ip6h;
+ struct iphdr *iph, _iph;
+
+ switch (eth_hdr(skb)->h_proto) {
+ case htons(ETH_P_IP):
+ iph = skb_header_pointer(skb, skb_network_offset(skb),
+ sizeof(*iph), &_iph);
+ if (iph)
+ return hash_v4(iph, initval);
+ break;
+ case htons(ETH_P_IPV6):
+ ip6h = skb_header_pointer(skb, skb_network_offset(skb),
+ sizeof(*ip6h), &_ip6h);
+ if (ip6h)
+ return hash_v6(ip6h, initval);
+ break;
+ }
+
+ return 0;
+}
static inline u32
nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family,
- u32 jhash_initval)
+ u32 initval)
{
- if (family == NFPROTO_IPV4)
- queue += ((u64) hash_v4(skb, jhash_initval) * queues_total) >> 32;
-#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
- else if (family == NFPROTO_IPV6)
- queue += ((u64) hash_v6(skb, jhash_initval) * queues_total) >> 32;
-#endif
+ switch (family) {
+ case NFPROTO_IPV4:
+ queue += reciprocal_scale(hash_v4(ip_hdr(skb), initval),
+ queues_total);
+ break;
+ case NFPROTO_IPV6:
+ queue += reciprocal_scale(hash_v6(ipv6_hdr(skb), initval),
+ queues_total);
+ break;
+ case NFPROTO_BRIDGE:
+ queue += reciprocal_scale(hash_bridge(skb, initval),
+ queues_total);
+ break;
+ }
return queue;
}
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 8972468..5031e07 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -19,6 +19,7 @@
const struct net_device *out;
u8 pf;
u8 hook;
+ bool tprot_set;
u8 tprot;
/* for x_tables compatibility */
struct xt_action_param xt;
@@ -36,6 +37,23 @@
pkt->pf = pkt->xt.family = state->pf;
}
+static inline void nft_set_pktinfo_proto_unspec(struct nft_pktinfo *pkt,
+ struct sk_buff *skb)
+{
+ pkt->tprot_set = false;
+ pkt->tprot = 0;
+ pkt->xt.thoff = 0;
+ pkt->xt.fragoff = 0;
+}
+
+static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ nft_set_pktinfo(pkt, skb, state);
+ nft_set_pktinfo_proto_unspec(pkt, skb);
+}
+
/**
* struct nft_verdict - nf_tables verdict
*
@@ -127,6 +145,7 @@
return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE;
}
+unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest);
unsigned int nft_parse_register(const struct nlattr *attr);
int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg);
diff --git a/include/net/netfilter/nf_tables_bridge.h b/include/net/netfilter/nf_tables_bridge.h
deleted file mode 100644
index 511fb79..0000000
--- a/include/net/netfilter/nf_tables_bridge.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _NET_NF_TABLES_BRIDGE_H
-#define _NET_NF_TABLES_BRIDGE_H
-
-int nft_bridge_iphdr_validate(struct sk_buff *skb);
-int nft_bridge_ip6hdr_validate(struct sk_buff *skb);
-
-#endif /* _NET_NF_TABLES_BRIDGE_H */
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index a9060dd..00f4f6b 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -28,6 +28,9 @@
int nft_cmp_module_init(void);
void nft_cmp_module_exit(void);
+int nft_range_module_init(void);
+void nft_range_module_exit(void);
+
int nft_lookup_module_init(void);
void nft_lookup_module_exit(void);
diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h
index ca6ef6b..968f00b 100644
--- a/include/net/netfilter/nf_tables_ipv4.h
+++ b/include/net/netfilter/nf_tables_ipv4.h
@@ -14,11 +14,54 @@
nft_set_pktinfo(pkt, skb, state);
ip = ip_hdr(pkt->skb);
+ pkt->tprot_set = true;
pkt->tprot = ip->protocol;
pkt->xt.thoff = ip_hdrlen(pkt->skb);
pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
}
+static inline int
+__nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct iphdr *iph, _iph;
+ u32 len, thoff;
+
+ iph = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*iph),
+ &_iph);
+ if (!iph)
+ return -1;
+
+ iph = ip_hdr(skb);
+ if (iph->ihl < 5 || iph->version != 4)
+ return -1;
+
+ len = ntohs(iph->tot_len);
+ thoff = iph->ihl * 4;
+ if (skb->len < len)
+ return -1;
+ else if (len < thoff)
+ return -1;
+
+ pkt->tprot_set = true;
+ pkt->tprot = iph->protocol;
+ pkt->xt.thoff = thoff;
+ pkt->xt.fragoff = ntohs(iph->frag_off) & IP_OFFSET;
+
+ return 0;
+}
+
+static inline void
+nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ nft_set_pktinfo(pkt, skb, state);
+ if (__nft_set_pktinfo_ipv4_validate(pkt, skb, state) < 0)
+ nft_set_pktinfo_proto_unspec(pkt, skb);
+}
+
extern struct nft_af_info nft_af_ipv4;
#endif
diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h
index 8ad39a6..d150b50 100644
--- a/include/net/netfilter/nf_tables_ipv6.h
+++ b/include/net/netfilter/nf_tables_ipv6.h
@@ -4,7 +4,7 @@
#include <linux/netfilter_ipv6/ip6_tables.h>
#include <net/ipv6.h>
-static inline int
+static inline void
nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
struct sk_buff *skb,
const struct nf_hook_state *state)
@@ -15,15 +15,64 @@
nft_set_pktinfo(pkt, skb, state);
protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL);
- /* If malformed, drop it */
+ if (protohdr < 0) {
+ nft_set_pktinfo_proto_unspec(pkt, skb);
+ return;
+ }
+
+ pkt->tprot_set = true;
+ pkt->tprot = protohdr;
+ pkt->xt.thoff = thoff;
+ pkt->xt.fragoff = frag_off;
+}
+
+static inline int
+__nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ struct ipv6hdr *ip6h, _ip6h;
+ unsigned int thoff = 0;
+ unsigned short frag_off;
+ int protohdr;
+ u32 pkt_len;
+
+ ip6h = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*ip6h),
+ &_ip6h);
+ if (!ip6h)
+ return -1;
+
+ if (ip6h->version != 6)
+ return -1;
+
+ pkt_len = ntohs(ip6h->payload_len);
+ if (pkt_len + sizeof(*ip6h) > skb->len)
+ return -1;
+
+ protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL);
if (protohdr < 0)
return -1;
+ pkt->tprot_set = true;
pkt->tprot = protohdr;
pkt->xt.thoff = thoff;
pkt->xt.fragoff = frag_off;
return 0;
+#else
+ return -1;
+#endif
+}
+
+static inline void
+nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt,
+ struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ nft_set_pktinfo(pkt, skb, state);
+ if (__nft_set_pktinfo_ipv6_validate(pkt, skb, state) < 0)
+ nft_set_pktinfo_proto_unspec(pkt, skb);
}
extern struct nft_af_info nft_af_ipv6;
diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h
index 36d7235..58487b1 100644
--- a/include/net/netns/netfilter.h
+++ b/include/net/netns/netfilter.h
@@ -16,6 +16,6 @@
#ifdef CONFIG_SYSCTL
struct ctl_table_header *nf_log_dir_header;
#endif
- struct list_head hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
+ struct nf_hook_entry __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
};
#endif
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 18d5dc1..92f3c86 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -39,6 +39,7 @@
#define GRE_IS_REC(f) ((f) & GRE_REC)
#define GRE_IS_ACK(f) ((f) & GRE_ACK)
+#define GRE_VERSION_0 __cpu_to_be16(0x0000)
#define GRE_VERSION_1 __cpu_to_be16(0x0001)
#define GRE_PROTO_PPP __cpu_to_be16(0x880b)
#define GRE_PPTP_KEY_MASK __cpu_to_be32(0xffff)
diff --git a/include/uapi/linux/netfilter/nf_log.h b/include/uapi/linux/netfilter/nf_log.h
new file mode 100644
index 0000000..8be21e0
--- /dev/null
+++ b/include/uapi/linux/netfilter/nf_log.h
@@ -0,0 +1,12 @@
+#ifndef _NETFILTER_NF_LOG_H
+#define _NETFILTER_NF_LOG_H
+
+#define NF_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */
+#define NF_LOG_TCPOPT 0x02 /* Log TCP options */
+#define NF_LOG_IPOPT 0x04 /* Log IP options */
+#define NF_LOG_UID 0x08 /* Log UID owning local socket */
+#define NF_LOG_NFLOG 0x10 /* Unsupported, don't reuse */
+#define NF_LOG_MACDECODE 0x20 /* Decode MAC header */
+#define NF_LOG_MASK 0x2f
+
+#endif /* _NETFILTER_NF_LOG_H */
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 28ce01d..c6c4477 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -546,6 +546,35 @@
};
#define NFTA_CMP_MAX (__NFTA_CMP_MAX - 1)
+/**
+ * enum nft_range_ops - nf_tables range operator
+ *
+ * @NFT_RANGE_EQ: equal
+ * @NFT_RANGE_NEQ: not equal
+ */
+enum nft_range_ops {
+ NFT_RANGE_EQ,
+ NFT_RANGE_NEQ,
+};
+
+/**
+ * enum nft_range_attributes - nf_tables range expression netlink attributes
+ *
+ * @NFTA_RANGE_SREG: source register of data to compare (NLA_U32: nft_registers)
+ * @NFTA_RANGE_OP: cmp operation (NLA_U32: nft_cmp_ops)
+ * @NFTA_RANGE_FROM_DATA: data range from (NLA_NESTED: nft_data_attributes)
+ * @NFTA_RANGE_TO_DATA: data range to (NLA_NESTED: nft_data_attributes)
+ */
+enum nft_range_attributes {
+ NFTA_RANGE_UNSPEC,
+ NFTA_RANGE_SREG,
+ NFTA_RANGE_OP,
+ NFTA_RANGE_FROM_DATA,
+ NFTA_RANGE_TO_DATA,
+ __NFTA_RANGE_MAX
+};
+#define NFTA_RANGE_MAX (__NFTA_RANGE_MAX - 1)
+
enum nft_lookup_flags {
NFT_LOOKUP_F_INV = (1 << 0),
};
@@ -575,6 +604,10 @@
NFT_DYNSET_OP_UPDATE,
};
+enum nft_dynset_flags {
+ NFT_DYNSET_F_INV = (1 << 0),
+};
+
/**
* enum nft_dynset_attributes - dynset expression attributes
*
@@ -585,6 +618,7 @@
* @NFTA_DYNSET_SREG_DATA: source register of the data (NLA_U32)
* @NFTA_DYNSET_TIMEOUT: timeout value for the new element (NLA_U64)
* @NFTA_DYNSET_EXPR: expression (NLA_NESTED: nft_expr_attributes)
+ * @NFTA_DYNSET_FLAGS: flags (NLA_U32)
*/
enum nft_dynset_attributes {
NFTA_DYNSET_UNSPEC,
@@ -596,6 +630,7 @@
NFTA_DYNSET_TIMEOUT,
NFTA_DYNSET_EXPR,
NFTA_DYNSET_PAD,
+ NFTA_DYNSET_FLAGS,
__NFTA_DYNSET_MAX,
};
#define NFTA_DYNSET_MAX (__NFTA_DYNSET_MAX - 1)
@@ -731,6 +766,7 @@
* @NFTA_HASH_LEN: source data length (NLA_U32)
* @NFTA_HASH_MODULUS: modulus value (NLA_U32)
* @NFTA_HASH_SEED: seed value (NLA_U32)
+ * @NFTA_HASH_OFFSET: add this offset value to hash result (NLA_U32)
*/
enum nft_hash_attributes {
NFTA_HASH_UNSPEC,
@@ -739,6 +775,7 @@
NFTA_HASH_LEN,
NFTA_HASH_MODULUS,
NFTA_HASH_SEED,
+ NFTA_HASH_OFFSET,
__NFTA_HASH_MAX,
};
#define NFTA_HASH_MAX (__NFTA_HASH_MAX - 1)
@@ -886,12 +923,14 @@
* @NFTA_QUEUE_NUM: netlink queue to send messages to (NLA_U16)
* @NFTA_QUEUE_TOTAL: number of queues to load balance packets on (NLA_U16)
* @NFTA_QUEUE_FLAGS: various flags (NLA_U16)
+ * @NFTA_QUEUE_SREG_QNUM: source register of queue number (NLA_U32: nft_registers)
*/
enum nft_queue_attributes {
NFTA_QUEUE_UNSPEC,
NFTA_QUEUE_NUM,
NFTA_QUEUE_TOTAL,
NFTA_QUEUE_FLAGS,
+ NFTA_QUEUE_SREG_QNUM,
__NFTA_QUEUE_MAX
};
#define NFTA_QUEUE_MAX (__NFTA_QUEUE_MAX - 1)
@@ -1126,14 +1165,16 @@
* enum nft_ng_attributes - nf_tables number generator expression netlink attributes
*
* @NFTA_NG_DREG: destination register (NLA_U32)
- * @NFTA_NG_UNTIL: source value to increment the counter until reset (NLA_U32)
+ * @NFTA_NG_MODULUS: maximum counter value (NLA_U32)
* @NFTA_NG_TYPE: operation type (NLA_U32)
+ * @NFTA_NG_OFFSET: offset to be added to the counter (NLA_U32)
*/
enum nft_ng_attributes {
NFTA_NG_UNSPEC,
NFTA_NG_DREG,
- NFTA_NG_UNTIL,
+ NFTA_NG_MODULUS,
NFTA_NG_TYPE,
+ NFTA_NG_OFFSET,
__NFTA_NG_MAX
};
#define NFTA_NG_MAX (__NFTA_NG_MAX - 1)
diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
index 9df78970..6deb886 100644
--- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
@@ -231,13 +231,13 @@
enum ctattr_stats_cpu {
CTA_STATS_UNSPEC,
- CTA_STATS_SEARCHED,
+ CTA_STATS_SEARCHED, /* no longer used */
CTA_STATS_FOUND,
- CTA_STATS_NEW,
+ CTA_STATS_NEW, /* no longer used */
CTA_STATS_INVALID,
CTA_STATS_IGNORE,
- CTA_STATS_DELETE,
- CTA_STATS_DELETE_LIST,
+ CTA_STATS_DELETE, /* no longer used */
+ CTA_STATS_DELETE_LIST, /* no longer used */
CTA_STATS_INSERT,
CTA_STATS_INSERT_FAILED,
CTA_STATS_DROP,
diff --git a/include/uapi/linux/netfilter/xt_hashlimit.h b/include/uapi/linux/netfilter/xt_hashlimit.h
index 6db9037..3efc0ca 100644
--- a/include/uapi/linux/netfilter/xt_hashlimit.h
+++ b/include/uapi/linux/netfilter/xt_hashlimit.h
@@ -6,6 +6,7 @@
/* timings are in milliseconds. */
#define XT_HASHLIMIT_SCALE 10000
+#define XT_HASHLIMIT_SCALE_v2 1000000llu
/* 1/10,000 sec period => max of 10,000/sec. Min rate is then 429490
* seconds, or one packet every 59 hours.
*/
@@ -63,6 +64,20 @@
__u8 srcmask, dstmask;
};
+struct hashlimit_cfg2 {
+ __u64 avg; /* Average secs between packets * scale */
+ __u64 burst; /* Period multiplier for upper limit. */
+ __u32 mode; /* bitmask of XT_HASHLIMIT_HASH_* */
+
+ /* user specified */
+ __u32 size; /* how many buckets */
+ __u32 max; /* max number of entries */
+ __u32 gc_interval; /* gc interval */
+ __u32 expire; /* when do entries expire? */
+
+ __u8 srcmask, dstmask;
+};
+
struct xt_hashlimit_mtinfo1 {
char name[IFNAMSIZ];
struct hashlimit_cfg1 cfg;
@@ -71,4 +86,12 @@
struct xt_hashlimit_htable *hinfo __attribute__((aligned(8)));
};
+struct xt_hashlimit_mtinfo2 {
+ char name[NAME_MAX];
+ struct hashlimit_cfg2 cfg;
+
+ /* Used internally by the kernel */
+ struct xt_hashlimit_htable *hinfo __attribute__((aligned(8)));
+};
+
#endif /* _UAPI_XT_HASHLIMIT_H */
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 77e7f69..2fe9345 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -30,6 +30,7 @@
#include <linux/netfilter_ipv6.h>
#include <linux/netfilter_arp.h>
#include <linux/in_route.h>
+#include <linux/rculist.h>
#include <linux/inetdevice.h>
#include <net/ip.h>
@@ -395,11 +396,10 @@
skb->dev = nf_bridge->physindev;
nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
- NF_HOOK_THRESH(NFPROTO_BRIDGE,
- NF_BR_PRE_ROUTING,
- net, sk, skb, skb->dev, NULL,
- br_nf_pre_routing_finish_bridge,
- 1);
+ br_nf_hook_thresh(NF_BR_PRE_ROUTING,
+ net, sk, skb, skb->dev,
+ NULL,
+ br_nf_pre_routing_finish);
return 0;
}
ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr);
@@ -417,10 +417,8 @@
skb->dev = nf_bridge->physindev;
nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
- NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, net, sk, skb,
- skb->dev, NULL,
- br_handle_frame_finish, 1);
-
+ br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL,
+ br_handle_frame_finish);
return 0;
}
@@ -992,6 +990,43 @@
.notifier_call = brnf_device_event,
};
+/* recursively invokes nf_hook_slow (again), skipping already-called
+ * hooks (< NF_BR_PRI_BRNF).
+ *
+ * Called with rcu read lock held.
+ */
+int br_nf_hook_thresh(unsigned int hook, struct net *net,
+ struct sock *sk, struct sk_buff *skb,
+ struct net_device *indev,
+ struct net_device *outdev,
+ int (*okfn)(struct net *, struct sock *,
+ struct sk_buff *))
+{
+ struct nf_hook_entry *elem;
+ struct nf_hook_state state;
+ int ret;
+
+ elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]);
+
+ while (elem && (elem->ops.priority <= NF_BR_PRI_BRNF))
+ elem = rcu_dereference(elem->next);
+
+ if (!elem)
+ return okfn(net, sk, skb);
+
+ /* We may already have this, but read-locks nest anyway */
+ rcu_read_lock();
+ nf_hook_state_init(&state, elem, hook, NF_BR_PRI_BRNF + 1,
+ NFPROTO_BRIDGE, indev, outdev, sk, net, okfn);
+
+ ret = nf_hook_slow(skb, &state);
+ rcu_read_unlock();
+ if (ret == 1)
+ ret = okfn(net, sk, skb);
+
+ return ret;
+}
+
#ifdef CONFIG_SYSCTL
static
int brnf_sysctl_call_tables(struct ctl_table *ctl, int write,
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 5e59a84..5989661 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -187,10 +187,9 @@
skb->dev = nf_bridge->physindev;
nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
- NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING,
- net, sk, skb, skb->dev, NULL,
- br_nf_pre_routing_finish_bridge,
- 1);
+ br_nf_hook_thresh(NF_BR_PRE_ROUTING,
+ net, sk, skb, skb->dev, NULL,
+ br_nf_pre_routing_finish_bridge);
return 0;
}
ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr);
@@ -207,9 +206,8 @@
skb->dev = nf_bridge->physindev;
nf_bridge_update_protocol(skb);
nf_bridge_push_encap_header(skb);
- NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, net, sk, skb,
- skb->dev, NULL,
- br_handle_frame_finish, 1);
+ br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb,
+ skb->dev, NULL, br_handle_frame_finish);
return 0;
}
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index 152300d..9a11086 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -91,7 +91,7 @@
if (loginfo->type == NF_LOG_TYPE_LOG)
bitmask = loginfo->u.log.logflags;
else
- bitmask = NF_LOG_MASK;
+ bitmask = NF_LOG_DEFAULT_MASK;
if ((bitmask & EBT_LOG_IP) && eth_hdr(skb)->h_proto ==
htons(ETH_P_IP)) {
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 20396499..2e7c4f9 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -24,7 +24,7 @@
return EBT_DROP;
if (par->hooknum != NF_BR_BROUTING)
- /* rcu_read_lock()ed by nf_hook_slow */
+ /* rcu_read_lock()ed by nf_hook_thresh */
ether_addr_copy(eth_hdr(skb)->h_dest,
br_port_get_rcu(par->in)->br->dev->dev_addr);
else
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 0833c25..f5c11bb 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -146,7 +146,7 @@
return 1;
if (NF_INVF(e, EBT_IOUT, ebt_dev_check(e->out, out)))
return 1;
- /* rcu_read_lock()ed by nf_hook_slow */
+ /* rcu_read_lock()ed by nf_hook_thresh */
if (in && (p = br_port_get_rcu(in)) != NULL &&
NF_INVF(e, EBT_ILOGICALIN,
ebt_dev_check(e->logical_in, p->br->dev)))
diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c
index a78c4e2..97afdc0 100644
--- a/net/bridge/netfilter/nf_tables_bridge.c
+++ b/net/bridge/netfilter/nf_tables_bridge.c
@@ -13,79 +13,11 @@
#include <linux/module.h>
#include <linux/netfilter_bridge.h>
#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables_bridge.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <net/netfilter/nf_tables_ipv4.h>
#include <net/netfilter/nf_tables_ipv6.h>
-int nft_bridge_iphdr_validate(struct sk_buff *skb)
-{
- struct iphdr *iph;
- u32 len;
-
- if (!pskb_may_pull(skb, sizeof(struct iphdr)))
- return 0;
-
- iph = ip_hdr(skb);
- if (iph->ihl < 5 || iph->version != 4)
- return 0;
-
- len = ntohs(iph->tot_len);
- if (skb->len < len)
- return 0;
- else if (len < (iph->ihl*4))
- return 0;
-
- if (!pskb_may_pull(skb, iph->ihl*4))
- return 0;
-
- return 1;
-}
-EXPORT_SYMBOL_GPL(nft_bridge_iphdr_validate);
-
-int nft_bridge_ip6hdr_validate(struct sk_buff *skb)
-{
- struct ipv6hdr *hdr;
- u32 pkt_len;
-
- if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
- return 0;
-
- hdr = ipv6_hdr(skb);
- if (hdr->version != 6)
- return 0;
-
- pkt_len = ntohs(hdr->payload_len);
- if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
- return 0;
-
- return 1;
-}
-EXPORT_SYMBOL_GPL(nft_bridge_ip6hdr_validate);
-
-static inline void nft_bridge_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- if (nft_bridge_iphdr_validate(skb))
- nft_set_pktinfo_ipv4(pkt, skb, state);
- else
- nft_set_pktinfo(pkt, skb, state);
-}
-
-static inline void nft_bridge_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- if (nft_bridge_ip6hdr_validate(skb) &&
- nft_set_pktinfo_ipv6(pkt, skb, state) == 0)
- return;
-#endif
- nft_set_pktinfo(pkt, skb, state);
-}
-
static unsigned int
nft_do_chain_bridge(void *priv,
struct sk_buff *skb,
@@ -95,13 +27,13 @@
switch (eth_hdr(skb)->h_proto) {
case htons(ETH_P_IP):
- nft_bridge_set_pktinfo_ipv4(&pkt, skb, state);
+ nft_set_pktinfo_ipv4_validate(&pkt, skb, state);
break;
case htons(ETH_P_IPV6):
- nft_bridge_set_pktinfo_ipv6(&pkt, skb, state);
+ nft_set_pktinfo_ipv6_validate(&pkt, skb, state);
break;
default:
- nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_unspec(&pkt, skb, state);
break;
}
@@ -207,12 +139,20 @@
int ret;
nf_register_afinfo(&nf_br_afinfo);
- nft_register_chain_type(&filter_bridge);
+ ret = nft_register_chain_type(&filter_bridge);
+ if (ret < 0)
+ goto err1;
+
ret = register_pernet_subsys(&nf_tables_bridge_net_ops);
- if (ret < 0) {
- nft_unregister_chain_type(&filter_bridge);
- nf_unregister_afinfo(&nf_br_afinfo);
- }
+ if (ret < 0)
+ goto err2;
+
+ return ret;
+
+err2:
+ nft_unregister_chain_type(&filter_bridge);
+err1:
+ nf_unregister_afinfo(&nf_br_afinfo);
return ret;
}
diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c
index 0b77ffb..4b3df6b 100644
--- a/net/bridge/netfilter/nft_reject_bridge.c
+++ b/net/bridge/netfilter/nft_reject_bridge.c
@@ -14,7 +14,6 @@
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nft_reject.h>
-#include <net/netfilter/nf_tables_bridge.h>
#include <net/netfilter/ipv4/nf_reject.h>
#include <net/netfilter/ipv6/nf_reject.h>
#include <linux/ip.h>
@@ -37,6 +36,30 @@
skb_pull(nskb, ETH_HLEN);
}
+static int nft_bridge_iphdr_validate(struct sk_buff *skb)
+{
+ struct iphdr *iph;
+ u32 len;
+
+ if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+ return 0;
+
+ iph = ip_hdr(skb);
+ if (iph->ihl < 5 || iph->version != 4)
+ return 0;
+
+ len = ntohs(iph->tot_len);
+ if (skb->len < len)
+ return 0;
+ else if (len < (iph->ihl*4))
+ return 0;
+
+ if (!pskb_may_pull(skb, iph->ihl*4))
+ return 0;
+
+ return 1;
+}
+
/* We cannot use oldskb->dev, it can be either bridge device (NF_BRIDGE INPUT)
* or the bridge port (NF_BRIDGE PREROUTING).
*/
@@ -143,6 +166,25 @@
br_forward(br_port_get_rcu(dev), nskb, false, true);
}
+static int nft_bridge_ip6hdr_validate(struct sk_buff *skb)
+{
+ struct ipv6hdr *hdr;
+ u32 pkt_len;
+
+ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+ return 0;
+
+ hdr = ipv6_hdr(skb);
+ if (hdr->version != 6)
+ return 0;
+
+ pkt_len = ntohs(hdr->payload_len);
+ if (pkt_len + sizeof(struct ipv6hdr) > skb->len)
+ return 0;
+
+ return 1;
+}
+
static void nft_reject_br_send_v6_tcp_reset(struct net *net,
struct sk_buff *oldskb,
const struct net_device *dev,
diff --git a/net/core/dev.c b/net/core/dev.c
index 9dbece2..c0c291f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4055,12 +4055,17 @@
{
#ifdef CONFIG_NETFILTER_INGRESS
if (nf_hook_ingress_active(skb)) {
+ int ingress_retval;
+
if (*pt_prev) {
*ret = deliver_skb(skb, *pt_prev, orig_dev);
*pt_prev = NULL;
}
- return nf_hook_ingress(skb);
+ rcu_read_lock();
+ ingress_retval = nf_hook_ingress(skb);
+ rcu_read_unlock();
+ return ingress_retval;
}
#endif /* CONFIG_NETFILTER_INGRESS */
return 0;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index f993545..7c00ce9 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -156,7 +156,7 @@
.u = {
.log = {
.level = 4,
- .logflags = NF_LOG_MASK,
+ .logflags = NF_LOG_DEFAULT_MASK,
},
},
};
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 870aebd..713c09a 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -110,7 +110,7 @@
if (!help)
return NF_ACCEPT;
- /* rcu_read_lock()ed by nf_hook_slow */
+ /* rcu_read_lock()ed by nf_hook_thresh */
helper = rcu_dereference(help->helper);
if (!helper)
return NF_ACCEPT;
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 4b5904b..d075b3c 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -149,7 +149,7 @@
return -NF_ACCEPT;
}
- /* rcu_read_lock()ed by nf_hook_slow */
+ /* rcu_read_lock()ed by nf_hook_thresh */
innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum);
/* Ordinarily, we'd expect the inverted tupleproto, but it's
diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c
index 8945c26..b24795e 100644
--- a/net/ipv4/netfilter/nf_log_arp.c
+++ b/net/ipv4/netfilter/nf_log_arp.c
@@ -30,7 +30,7 @@
.u = {
.log = {
.level = LOGLEVEL_NOTICE,
- .logflags = NF_LOG_MASK,
+ .logflags = NF_LOG_DEFAULT_MASK,
},
},
};
diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c
index 20f2255..8566489 100644
--- a/net/ipv4/netfilter/nf_log_ipv4.c
+++ b/net/ipv4/netfilter/nf_log_ipv4.c
@@ -29,7 +29,7 @@
.u = {
.log = {
.level = LOGLEVEL_NOTICE,
- .logflags = NF_LOG_MASK,
+ .logflags = NF_LOG_DEFAULT_MASK,
},
},
};
@@ -46,7 +46,7 @@
if (info->type == NF_LOG_TYPE_LOG)
logflags = info->u.log.logflags;
else
- logflags = NF_LOG_MASK;
+ logflags = NF_LOG_DEFAULT_MASK;
ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
if (ih == NULL) {
@@ -76,7 +76,7 @@
if (ntohs(ih->frag_off) & IP_OFFSET)
nf_log_buf_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
- if ((logflags & XT_LOG_IPOPT) &&
+ if ((logflags & NF_LOG_IPOPT) &&
ih->ihl * 4 > sizeof(struct iphdr)) {
const unsigned char *op;
unsigned char _opt[4 * 15 - sizeof(struct iphdr)];
@@ -250,7 +250,7 @@
}
/* Max length: 15 "UID=4294967295 " */
- if ((logflags & XT_LOG_UID) && !iphoff)
+ if ((logflags & NF_LOG_UID) && !iphoff)
nf_log_dump_sk_uid_gid(m, skb->sk);
/* Max length: 16 "MARK=0xFFFFFFFF " */
@@ -282,7 +282,7 @@
if (info->type == NF_LOG_TYPE_LOG)
logflags = info->u.log.logflags;
- if (!(logflags & XT_LOG_MACDECODE))
+ if (!(logflags & NF_LOG_MACDECODE))
goto fallback;
switch (dev->type) {
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index 9414923..edf0500 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -88,8 +88,8 @@
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
- const struct gre_hdr *greh;
- struct gre_hdr_pptp *pgreh;
+ const struct gre_base_hdr *greh;
+ struct pptp_gre_header *pgreh;
/* pgreh includes two optional 32bit fields which are not required
* to be there. That's where the magic '8' comes from */
@@ -97,18 +97,19 @@
return false;
greh = (void *)skb->data + hdroff;
- pgreh = (struct gre_hdr_pptp *)greh;
+ pgreh = (struct pptp_gre_header *)greh;
/* we only have destination manip of a packet, since 'source key'
* is not present in the packet itself */
if (maniptype != NF_NAT_MANIP_DST)
return true;
- switch (greh->version) {
- case GRE_VERSION_1701:
+
+ switch (greh->flags & GRE_VERSION) {
+ case GRE_VERSION_0:
/* We do not currently NAT any GREv0 packets.
* Try to behave like "nf_nat_proto_unknown" */
break;
- case GRE_VERSION_PPTP:
+ case GRE_VERSION_1:
pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
pgreh->call_id = tuple->dst.u.gre.key;
break;
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
index cd84d42..805c8dd 100644
--- a/net/ipv4/netfilter/nf_tables_arp.c
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -21,7 +21,7 @@
{
struct nft_pktinfo pkt;
- nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_unspec(&pkt, skb, state);
return nft_do_chain(&pkt, priv);
}
@@ -80,7 +80,10 @@
{
int ret;
- nft_register_chain_type(&filter_arp);
+ ret = nft_register_chain_type(&filter_arp);
+ if (ret < 0)
+ return ret;
+
ret = register_pernet_subsys(&nf_tables_arp_net_ops);
if (ret < 0)
nft_unregister_chain_type(&filter_arp);
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
index e44ba3b..2840a29 100644
--- a/net/ipv4/netfilter/nf_tables_ipv4.c
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -103,7 +103,10 @@
{
int ret;
- nft_register_chain_type(&filter_ipv4);
+ ret = nft_register_chain_type(&filter_ipv4);
+ if (ret < 0)
+ return ret;
+
ret = register_pernet_subsys(&nf_tables_ipv4_net_ops);
if (ret < 0)
nft_unregister_chain_type(&filter_ipv4);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 4062ed2..8c6ad2d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6326,6 +6326,7 @@
tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
tcp_openreq_init(req, &tmp_opt, skb, sk);
+ inet_rsk(req)->no_srccheck = inet_sk(sk)->transparent;
/* Note: tcp_v6_init_req() might override ir_iif for link locals */
inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 04b9893..7ac37c3 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1196,7 +1196,6 @@
sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
- ireq->no_srccheck = inet_sk(sk_listener)->transparent;
ireq->opt = tcp_v4_save_options(skb);
}
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 552fac2..55aacea 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -190,7 +190,7 @@
.u = {
.log = {
.level = LOGLEVEL_WARNING,
- .logflags = NF_LOG_MASK,
+ .logflags = NF_LOG_DEFAULT_MASK,
},
},
};
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 1aa5848..963ee38 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -115,7 +115,7 @@
help = nfct_help(ct);
if (!help)
return NF_ACCEPT;
- /* rcu_read_lock()ed by nf_hook_slow */
+ /* rcu_read_lock()ed by nf_hook_thresh */
helper = rcu_dereference(help->helper);
if (!helper)
return NF_ACCEPT;
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index 660bc10..f5a61bc 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -165,7 +165,7 @@
return -NF_ACCEPT;
}
- /* rcu_read_lock()ed by nf_hook_slow */
+ /* rcu_read_lock()ed by nf_hook_thresh */
inproto = __nf_ct_l4proto_find(PF_INET6, origtuple.dst.protonum);
/* Ordinarily, we'd expect the inverted tupleproto, but it's
diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c
index c1bcf69..57d8606 100644
--- a/net/ipv6/netfilter/nf_log_ipv6.c
+++ b/net/ipv6/netfilter/nf_log_ipv6.c
@@ -30,7 +30,7 @@
.u = {
.log = {
.level = LOGLEVEL_NOTICE,
- .logflags = NF_LOG_MASK,
+ .logflags = NF_LOG_DEFAULT_MASK,
},
},
};
@@ -52,7 +52,7 @@
if (info->type == NF_LOG_TYPE_LOG)
logflags = info->u.log.logflags;
else
- logflags = NF_LOG_MASK;
+ logflags = NF_LOG_DEFAULT_MASK;
ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h);
if (ih == NULL) {
@@ -84,7 +84,7 @@
}
/* Max length: 48 "OPT (...) " */
- if (logflags & XT_LOG_IPOPT)
+ if (logflags & NF_LOG_IPOPT)
nf_log_buf_add(m, "OPT ( ");
switch (currenthdr) {
@@ -121,7 +121,7 @@
case IPPROTO_ROUTING:
case IPPROTO_HOPOPTS:
if (fragment) {
- if (logflags & XT_LOG_IPOPT)
+ if (logflags & NF_LOG_IPOPT)
nf_log_buf_add(m, ")");
return;
}
@@ -129,7 +129,7 @@
break;
/* Max Length */
case IPPROTO_AH:
- if (logflags & XT_LOG_IPOPT) {
+ if (logflags & NF_LOG_IPOPT) {
struct ip_auth_hdr _ahdr;
const struct ip_auth_hdr *ah;
@@ -161,7 +161,7 @@
hdrlen = (hp->hdrlen+2)<<2;
break;
case IPPROTO_ESP:
- if (logflags & XT_LOG_IPOPT) {
+ if (logflags & NF_LOG_IPOPT) {
struct ip_esp_hdr _esph;
const struct ip_esp_hdr *eh;
@@ -194,7 +194,7 @@
nf_log_buf_add(m, "Unknown Ext Hdr %u", currenthdr);
return;
}
- if (logflags & XT_LOG_IPOPT)
+ if (logflags & NF_LOG_IPOPT)
nf_log_buf_add(m, ") ");
currenthdr = hp->nexthdr;
@@ -277,7 +277,7 @@
}
/* Max length: 15 "UID=4294967295 " */
- if ((logflags & XT_LOG_UID) && recurse)
+ if ((logflags & NF_LOG_UID) && recurse)
nf_log_dump_sk_uid_gid(m, skb->sk);
/* Max length: 16 "MARK=0xFFFFFFFF " */
@@ -295,7 +295,7 @@
if (info->type == NF_LOG_TYPE_LOG)
logflags = info->u.log.logflags;
- if (!(logflags & XT_LOG_MACDECODE))
+ if (!(logflags & NF_LOG_MACDECODE))
goto fallback;
switch (dev->type) {
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
index 30b22f4..d6e4ba5 100644
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ b/net/ipv6/netfilter/nf_tables_ipv6.c
@@ -22,9 +22,7 @@
{
struct nft_pktinfo pkt;
- /* malformed packet, drop it */
- if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0)
- return NF_DROP;
+ nft_set_pktinfo_ipv6(&pkt, skb, state);
return nft_do_chain(&pkt, priv);
}
@@ -102,7 +100,10 @@
{
int ret;
- nft_register_chain_type(&filter_ipv6);
+ ret = nft_register_chain_type(&filter_ipv6);
+ if (ret < 0)
+ return ret;
+
ret = register_pernet_subsys(&nf_tables_ipv6_net_ops);
if (ret < 0)
nft_unregister_chain_type(&filter_ipv6);
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
index 2535223..f272747 100644
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -33,9 +33,7 @@
u32 mark, flowlabel;
int err;
- /* malformed packet, drop it */
- if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0)
- return NF_DROP;
+ nft_set_pktinfo_ipv6(&pkt, skb, state);
/* save source/dest address, mark, hoplimit, flowlabel, priority */
memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr));
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 0c858110..c23c3c8 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -71,8 +71,9 @@
# nf_tables
nf_tables-objs += nf_tables_core.o nf_tables_api.o nf_tables_trace.o
-nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o
+nf_tables-objs += nft_immediate.o nft_cmp.o nft_range.o
nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o
+nf_tables-objs += nft_lookup.o nft_dynset.o
obj-$(CONFIG_NF_TABLES) += nf_tables.o
obj-$(CONFIG_NF_TABLES_INET) += nf_tables_inet.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 2c5327e..fa6715d 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -22,6 +22,7 @@
#include <linux/proc_fs.h>
#include <linux/mutex.h>
#include <linux/slab.h>
+#include <linux/rcupdate.h>
#include <net/net_namespace.h>
#include <net/sock.h>
@@ -61,33 +62,55 @@
#endif
static DEFINE_MUTEX(nf_hook_mutex);
+#define nf_entry_dereference(e) \
+ rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
-static struct list_head *nf_find_hook_list(struct net *net,
- const struct nf_hook_ops *reg)
+static struct nf_hook_entry *nf_hook_entry_head(struct net *net,
+ const struct nf_hook_ops *reg)
{
- struct list_head *hook_list = NULL;
+ struct nf_hook_entry *hook_head = NULL;
if (reg->pf != NFPROTO_NETDEV)
- hook_list = &net->nf.hooks[reg->pf][reg->hooknum];
+ hook_head = nf_entry_dereference(net->nf.hooks[reg->pf]
+ [reg->hooknum]);
else if (reg->hooknum == NF_NETDEV_INGRESS) {
#ifdef CONFIG_NETFILTER_INGRESS
if (reg->dev && dev_net(reg->dev) == net)
- hook_list = ®->dev->nf_hooks_ingress;
+ hook_head =
+ nf_entry_dereference(
+ reg->dev->nf_hooks_ingress);
#endif
}
- return hook_list;
+ return hook_head;
}
-struct nf_hook_entry {
- const struct nf_hook_ops *orig_ops;
- struct nf_hook_ops ops;
-};
+/* must hold nf_hook_mutex */
+static void nf_set_hooks_head(struct net *net, const struct nf_hook_ops *reg,
+ struct nf_hook_entry *entry)
+{
+ switch (reg->pf) {
+ case NFPROTO_NETDEV:
+ /* We already checked in nf_register_net_hook() that this is
+ * used from ingress.
+ */
+ rcu_assign_pointer(reg->dev->nf_hooks_ingress, entry);
+ break;
+ default:
+ rcu_assign_pointer(net->nf.hooks[reg->pf][reg->hooknum],
+ entry);
+ break;
+ }
+}
int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
{
- struct list_head *hook_list;
+ struct nf_hook_entry *hooks_entry;
struct nf_hook_entry *entry;
- struct nf_hook_ops *elem;
+
+ if (reg->pf == NFPROTO_NETDEV &&
+ (reg->hooknum != NF_NETDEV_INGRESS ||
+ !reg->dev || dev_net(reg->dev) != net))
+ return -EINVAL;
entry = kmalloc(sizeof(*entry), GFP_KERNEL);
if (!entry)
@@ -95,19 +118,30 @@
entry->orig_ops = reg;
entry->ops = *reg;
-
- hook_list = nf_find_hook_list(net, reg);
- if (!hook_list) {
- kfree(entry);
- return -ENOENT;
- }
+ entry->next = NULL;
mutex_lock(&nf_hook_mutex);
- list_for_each_entry(elem, hook_list, list) {
- if (reg->priority < elem->priority)
- break;
+ hooks_entry = nf_hook_entry_head(net, reg);
+
+ if (hooks_entry && hooks_entry->orig_ops->priority > reg->priority) {
+ /* This is the case where we need to insert at the head */
+ entry->next = hooks_entry;
+ hooks_entry = NULL;
}
- list_add_rcu(&entry->ops.list, elem->list.prev);
+
+ while (hooks_entry &&
+ reg->priority >= hooks_entry->orig_ops->priority &&
+ nf_entry_dereference(hooks_entry->next)) {
+ hooks_entry = nf_entry_dereference(hooks_entry->next);
+ }
+
+ if (hooks_entry) {
+ entry->next = nf_entry_dereference(hooks_entry->next);
+ rcu_assign_pointer(hooks_entry->next, entry);
+ } else {
+ nf_set_hooks_head(net, reg, entry);
+ }
+
mutex_unlock(&nf_hook_mutex);
#ifdef CONFIG_NETFILTER_INGRESS
if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
@@ -122,24 +156,33 @@
void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
{
- struct list_head *hook_list;
- struct nf_hook_entry *entry;
- struct nf_hook_ops *elem;
-
- hook_list = nf_find_hook_list(net, reg);
- if (!hook_list)
- return;
+ struct nf_hook_entry *hooks_entry;
mutex_lock(&nf_hook_mutex);
- list_for_each_entry(elem, hook_list, list) {
- entry = container_of(elem, struct nf_hook_entry, ops);
- if (entry->orig_ops == reg) {
- list_del_rcu(&entry->ops.list);
- break;
- }
+ hooks_entry = nf_hook_entry_head(net, reg);
+ if (hooks_entry->orig_ops == reg) {
+ nf_set_hooks_head(net, reg,
+ nf_entry_dereference(hooks_entry->next));
+ goto unlock;
}
+ while (hooks_entry && nf_entry_dereference(hooks_entry->next)) {
+ struct nf_hook_entry *next =
+ nf_entry_dereference(hooks_entry->next);
+ struct nf_hook_entry *nnext;
+
+ if (next->orig_ops != reg) {
+ hooks_entry = next;
+ continue;
+ }
+ nnext = nf_entry_dereference(next->next);
+ rcu_assign_pointer(hooks_entry->next, nnext);
+ hooks_entry = next;
+ break;
+ }
+
+unlock:
mutex_unlock(&nf_hook_mutex);
- if (&elem->list == hook_list) {
+ if (!hooks_entry) {
WARN(1, "nf_unregister_net_hook: hook not found!\n");
return;
}
@@ -151,10 +194,10 @@
static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
#endif
synchronize_net();
- nf_queue_nf_hook_drop(net, &entry->ops);
+ nf_queue_nf_hook_drop(net, hooks_entry);
/* other cpu might still process nfqueue verdict that used reg */
synchronize_net();
- kfree(entry);
+ kfree(hooks_entry);
}
EXPORT_SYMBOL(nf_unregister_net_hook);
@@ -294,10 +337,9 @@
}
EXPORT_SYMBOL(_nf_unregister_hooks);
-unsigned int nf_iterate(struct list_head *head,
- struct sk_buff *skb,
+unsigned int nf_iterate(struct sk_buff *skb,
struct nf_hook_state *state,
- struct nf_hook_ops **elemp)
+ struct nf_hook_entry **entryp)
{
unsigned int verdict;
@@ -305,20 +347,23 @@
* The caller must not block between calls to this
* function because of risk of continuing from deleted element.
*/
- list_for_each_entry_continue_rcu((*elemp), head, list) {
- if (state->thresh > (*elemp)->priority)
+ while (*entryp) {
+ if (state->thresh > (*entryp)->ops.priority) {
+ *entryp = rcu_dereference((*entryp)->next);
continue;
+ }
/* Optimization: we don't need to hold module
reference here, since function can't sleep. --RR */
repeat:
- verdict = (*elemp)->hook((*elemp)->priv, skb, state);
+ verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state);
if (verdict != NF_ACCEPT) {
#ifdef CONFIG_NETFILTER_DEBUG
if (unlikely((verdict & NF_VERDICT_MASK)
> NF_MAX_VERDICT)) {
NFDEBUG("Evil return from %p(%u).\n",
- (*elemp)->hook, state->hook);
+ (*entryp)->ops.hook, state->hook);
+ *entryp = rcu_dereference((*entryp)->next);
continue;
}
#endif
@@ -326,25 +371,23 @@
return verdict;
goto repeat;
}
+ *entryp = rcu_dereference((*entryp)->next);
}
return NF_ACCEPT;
}
/* Returns 1 if okfn() needs to be executed by the caller,
- * -EPERM for NF_DROP, 0 otherwise. */
+ * -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */
int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
{
- struct nf_hook_ops *elem;
+ struct nf_hook_entry *entry;
unsigned int verdict;
int ret = 0;
- /* We may already have this, but read-locks nest anyway */
- rcu_read_lock();
-
- elem = list_entry_rcu(state->hook_list, struct nf_hook_ops, list);
+ entry = rcu_dereference(state->hook_entries);
next_hook:
- verdict = nf_iterate(state->hook_list, skb, state, &elem);
+ verdict = nf_iterate(skb, state, &entry);
if (verdict == NF_ACCEPT || verdict == NF_STOP) {
ret = 1;
} else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
@@ -353,8 +396,10 @@
if (ret == 0)
ret = -EPERM;
} else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
- int err = nf_queue(skb, elem, state,
- verdict >> NF_VERDICT_QBITS);
+ int err;
+
+ RCU_INIT_POINTER(state->hook_entries, entry);
+ err = nf_queue(skb, state, verdict >> NF_VERDICT_QBITS);
if (err < 0) {
if (err == -ESRCH &&
(verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
@@ -362,7 +407,6 @@
kfree_skb(skb);
}
}
- rcu_read_unlock();
return ret;
}
EXPORT_SYMBOL(nf_hook_slow);
@@ -482,7 +526,7 @@
for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) {
for (h = 0; h < NF_MAX_HOOKS; h++)
- INIT_LIST_HEAD(&net->nf.hooks[i][h]);
+ RCU_INIT_POINTER(net->nf.hooks[i][h], NULL);
}
#ifdef CONFIG_PROC_FS
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 6570982..ba6a1d4 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -379,7 +379,6 @@
destroy_conntrack(struct nf_conntrack *nfct)
{
struct nf_conn *ct = (struct nf_conn *)nfct;
- struct net *net = nf_ct_net(ct);
struct nf_conntrack_l4proto *l4proto;
pr_debug("destroy_conntrack(%p)\n", ct);
@@ -406,7 +405,6 @@
nf_ct_del_from_dying_or_unconfirmed_list(ct);
- NF_CT_STAT_INC(net, delete);
local_bh_enable();
if (ct->master)
@@ -438,7 +436,6 @@
nf_ct_add_to_dying_list(ct);
- NF_CT_STAT_INC(net, delete_list);
local_bh_enable();
}
@@ -529,11 +526,8 @@
if (nf_ct_is_dying(ct))
continue;
- if (nf_ct_key_equal(h, tuple, zone, net)) {
- NF_CT_STAT_INC_ATOMIC(net, found);
+ if (nf_ct_key_equal(h, tuple, zone, net))
return h;
- }
- NF_CT_STAT_INC_ATOMIC(net, searched);
}
/*
* if the nulls value we got at the end of this lookup is
@@ -798,7 +792,6 @@
*/
__nf_conntrack_hash_insert(ct, hash, reply_hash);
nf_conntrack_double_unlock(hash, reply_hash);
- NF_CT_STAT_INC(net, insert);
local_bh_enable();
help = nfct_help(ct);
@@ -857,7 +850,6 @@
rcu_read_unlock();
return 1;
}
- NF_CT_STAT_INC_ATOMIC(net, searched);
}
if (get_nulls_value(n) != hash) {
@@ -1177,10 +1169,8 @@
}
spin_unlock(&nf_conntrack_expect_lock);
}
- if (!exp) {
+ if (!exp)
__nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
- NF_CT_STAT_INC(net, new);
- }
/* Now it is inserted into the unconfirmed list, bump refcount */
nf_conntrack_get(&ct->ct_general);
@@ -1285,7 +1275,7 @@
skb->nfct = NULL;
}
- /* rcu_read_lock()ed by nf_hook_slow */
+ /* rcu_read_lock()ed by nf_hook_thresh */
l3proto = __nf_ct_l3proto_find(pf);
ret = l3proto->get_l4proto(skb, skb_network_offset(skb),
&dataoff, &protonum);
diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c
index b6934b5..e3ed200 100644
--- a/net/netfilter/nf_conntrack_ftp.c
+++ b/net/netfilter/nf_conntrack_ftp.c
@@ -301,8 +301,6 @@
size_t i = plen;
pr_debug("find_pattern `%s': dlen = %Zu\n", pattern, dlen);
- if (dlen == 0)
- return 0;
if (dlen <= plen) {
/* Short packet: try for partial? */
@@ -311,19 +309,8 @@
else return 0;
}
- if (strncasecmp(data, pattern, plen) != 0) {
-#if 0
- size_t i;
-
- pr_debug("ftp: string mismatch\n");
- for (i = 0; i < plen; i++) {
- pr_debug("ftp:char %u `%c'(%u) vs `%c'(%u)\n",
- i, data[i], data[i],
- pattern[i], pattern[i]);
- }
-#endif
+ if (strncasecmp(data, pattern, plen) != 0)
return 0;
- }
pr_debug("Pattern matches!\n");
/* Now we've found the constant string, try to skip
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 5c0db5c..f65d9363 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -736,7 +736,7 @@
const struct nf_afinfo *afinfo;
int ret = 0;
- /* rcu_read_lock()ed by nf_hook_slow() */
+ /* rcu_read_lock()ed by nf_hook_thresh */
afinfo = nf_get_afinfo(family);
if (!afinfo)
return 0;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index b989b81..336e215 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -189,7 +189,6 @@
struct nf_conntrack_helper *helper = NULL;
struct nf_conn_help *help;
struct net *net = nf_ct_net(ct);
- int ret = 0;
/* We already got a helper explicitly attached. The function
* nf_conntrack_alter_reply - in case NAT is in use - asks for looking
@@ -223,15 +222,13 @@
if (helper == NULL) {
if (help)
RCU_INIT_POINTER(help->helper, NULL);
- goto out;
+ return 0;
}
if (help == NULL) {
help = nf_ct_helper_ext_add(ct, helper, flags);
- if (help == NULL) {
- ret = -ENOMEM;
- goto out;
- }
+ if (help == NULL)
+ return -ENOMEM;
} else {
/* We only allow helper re-assignment of the same sort since
* we cannot reallocate the helper extension area.
@@ -240,13 +237,13 @@
if (tmp && tmp->help != helper->help) {
RCU_INIT_POINTER(help->helper, NULL);
- goto out;
+ return 0;
}
}
rcu_assign_pointer(help->helper, helper);
-out:
- return ret;
+
+ return 0;
}
EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper);
@@ -349,7 +346,7 @@
/* Called from the helper function, this call never fails */
help = nfct_help(ct);
- /* rcu_read_lock()ed by nf_hook_slow */
+ /* rcu_read_lock()ed by nf_hook_thresh */
helper = rcu_dereference(help->helper);
nf_log_packet(nf_ct_net(ct), nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL,
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index c052b71..2754045 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1984,13 +1984,9 @@
nfmsg->version = NFNETLINK_V0;
nfmsg->res_id = htons(cpu);
- if (nla_put_be32(skb, CTA_STATS_SEARCHED, htonl(st->searched)) ||
- nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) ||
- nla_put_be32(skb, CTA_STATS_NEW, htonl(st->new)) ||
+ if (nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) ||
nla_put_be32(skb, CTA_STATS_INVALID, htonl(st->invalid)) ||
nla_put_be32(skb, CTA_STATS_IGNORE, htonl(st->ignore)) ||
- nla_put_be32(skb, CTA_STATS_DELETE, htonl(st->delete)) ||
- nla_put_be32(skb, CTA_STATS_DELETE_LIST, htonl(st->delete_list)) ||
nla_put_be32(skb, CTA_STATS_INSERT, htonl(st->insert)) ||
nla_put_be32(skb, CTA_STATS_INSERT_FAILED,
htonl(st->insert_failed)) ||
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index a96451a..9a715f8 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -192,15 +192,15 @@
static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
struct net *net, struct nf_conntrack_tuple *tuple)
{
- const struct gre_hdr_pptp *pgrehdr;
- struct gre_hdr_pptp _pgrehdr;
+ const struct pptp_gre_header *pgrehdr;
+ struct pptp_gre_header _pgrehdr;
__be16 srckey;
- const struct gre_hdr *grehdr;
- struct gre_hdr _grehdr;
+ const struct gre_base_hdr *grehdr;
+ struct gre_base_hdr _grehdr;
/* first only delinearize old RFC1701 GRE header */
grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr);
- if (!grehdr || grehdr->version != GRE_VERSION_PPTP) {
+ if (!grehdr || (grehdr->flags & GRE_VERSION) != GRE_VERSION_1) {
/* try to behave like "nf_conntrack_proto_generic" */
tuple->src.u.all = 0;
tuple->dst.u.all = 0;
@@ -212,8 +212,8 @@
if (!pgrehdr)
return true;
- if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) {
- pr_debug("GRE_VERSION_PPTP but unknown proto\n");
+ if (grehdr->protocol != GRE_PROTO_PPP) {
+ pr_debug("Unsupported GRE proto(0x%x)\n", ntohs(grehdr->protocol));
return false;
}
diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c
index dff0f0c..ef7063e 100644
--- a/net/netfilter/nf_conntrack_seqadj.c
+++ b/net/netfilter/nf_conntrack_seqadj.c
@@ -169,7 +169,7 @@
s32 seqoff, ackoff;
struct nf_conn_seqadj *seqadj = nfct_seqadj(ct);
struct nf_ct_seqadj *this_way, *other_way;
- int res;
+ int res = 1;
this_way = &seqadj->seq[dir];
other_way = &seqadj->seq[!dir];
@@ -184,27 +184,31 @@
else
seqoff = this_way->offset_before;
+ newseq = htonl(ntohl(tcph->seq) + seqoff);
+ inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, false);
+ pr_debug("Adjusting sequence number from %u->%u\n",
+ ntohl(tcph->seq), ntohl(newseq));
+ tcph->seq = newseq;
+
+ if (!tcph->ack)
+ goto out;
+
if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
other_way->correction_pos))
ackoff = other_way->offset_after;
else
ackoff = other_way->offset_before;
- newseq = htonl(ntohl(tcph->seq) + seqoff);
newack = htonl(ntohl(tcph->ack_seq) - ackoff);
-
- inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, false);
inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack,
false);
-
- pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
+ pr_debug("Adjusting ack number from %u->%u, ack from %u->%u\n",
ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
ntohl(newack));
-
- tcph->seq = newseq;
tcph->ack_seq = newack;
res = nf_ct_sack_adjust(skb, protoff, tcph, ct, ctinfo);
+out:
spin_unlock_bh(&ct->lock);
return res;
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 7d77217..621b81c 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -83,9 +83,10 @@
static int iswordc(const char c)
{
if (isalnum(c) || c == '!' || c == '"' || c == '%' ||
- (c >= '(' && c <= '/') || c == ':' || c == '<' || c == '>' ||
+ (c >= '(' && c <= '+') || c == ':' || c == '<' || c == '>' ||
c == '?' || (c >= '[' && c <= ']') || c == '_' || c == '`' ||
- c == '{' || c == '}' || c == '~')
+ c == '{' || c == '}' || c == '~' || (c >= '-' && c <= '/') ||
+ c == '\'')
return 1;
return 0;
}
@@ -329,13 +330,12 @@
static const char *sip_skip_whitespace(const char *dptr, const char *limit)
{
for (; dptr < limit; dptr++) {
- if (*dptr == ' ')
+ if (*dptr == ' ' || *dptr == '\t')
continue;
if (*dptr != '\r' && *dptr != '\n')
break;
dptr = sip_follow_continuation(dptr, limit);
- if (dptr == NULL)
- return NULL;
+ break;
}
return dptr;
}
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 3d9a316..5f446cd 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -212,6 +212,11 @@
if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
return 0;
+ if (nf_ct_should_gc(ct)) {
+ nf_ct_kill(ct);
+ goto release;
+ }
+
/* we only want to print DIR_ORIGINAL */
if (NF_CT_DIRECTION(hash))
goto release;
@@ -352,13 +357,13 @@
seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x "
"%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
nr_conntracks,
- st->searched,
+ 0,
st->found,
- st->new,
+ 0,
st->invalid,
st->ignore,
- st->delete,
- st->delete_list,
+ 0,
+ 0,
st->insert,
st->insert_failed,
st->drop,
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index 0655225..e0adb59 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -13,13 +13,13 @@
/* core.c */
-unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb,
- struct nf_hook_state *state, struct nf_hook_ops **elemp);
+unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state,
+ struct nf_hook_entry **entryp);
/* nf_queue.c */
-int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem,
- struct nf_hook_state *state, unsigned int queuenum);
-void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops);
+int nf_queue(struct sk_buff *skb, struct nf_hook_state *state,
+ unsigned int queuenum);
+void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry);
int __init netfilter_queue_init(void);
/* nf_log.c */
diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c
index a5aa596..119fe1c 100644
--- a/net/netfilter/nf_log_common.c
+++ b/net/netfilter/nf_log_common.c
@@ -77,7 +77,7 @@
nf_log_buf_add(m, "SPT=%u DPT=%u ",
ntohs(th->source), ntohs(th->dest));
/* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
- if (logflags & XT_LOG_TCPSEQ) {
+ if (logflags & NF_LOG_TCPSEQ) {
nf_log_buf_add(m, "SEQ=%u ACK=%u ",
ntohl(th->seq), ntohl(th->ack_seq));
}
@@ -107,7 +107,7 @@
/* Max length: 11 "URGP=65535 " */
nf_log_buf_add(m, "URGP=%u ", ntohs(th->urg_ptr));
- if ((logflags & XT_LOG_TCPOPT) && th->doff*4 > sizeof(struct tcphdr)) {
+ if ((logflags & NF_LOG_TCPOPT) && th->doff*4 > sizeof(struct tcphdr)) {
u_int8_t _opt[60 - sizeof(struct tcphdr)];
const u_int8_t *op;
unsigned int i;
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index b19ad20..96964a0 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -96,14 +96,14 @@
}
EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
-void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops)
+void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry)
{
const struct nf_queue_handler *qh;
rcu_read_lock();
qh = rcu_dereference(net->nf.queue_handler);
if (qh)
- qh->nf_hook_drop(net, ops);
+ qh->nf_hook_drop(net, entry);
rcu_read_unlock();
}
@@ -112,7 +112,6 @@
* through nf_reinject().
*/
int nf_queue(struct sk_buff *skb,
- struct nf_hook_ops *elem,
struct nf_hook_state *state,
unsigned int queuenum)
{
@@ -141,7 +140,6 @@
*entry = (struct nf_queue_entry) {
.skb = skb,
- .elem = elem,
.state = *state,
.size = sizeof(*entry) + afinfo->route_key_size,
};
@@ -165,11 +163,15 @@
void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
{
+ struct nf_hook_entry *hook_entry;
struct sk_buff *skb = entry->skb;
- struct nf_hook_ops *elem = entry->elem;
const struct nf_afinfo *afinfo;
+ struct nf_hook_ops *elem;
int err;
+ hook_entry = rcu_dereference(entry->state.hook_entries);
+ elem = &hook_entry->ops;
+
nf_queue_entry_release_refs(entry);
/* Continue traversal iff userspace said ok... */
@@ -186,8 +188,7 @@
if (verdict == NF_ACCEPT) {
next_hook:
- verdict = nf_iterate(entry->state.hook_list,
- skb, &entry->state, &elem);
+ verdict = nf_iterate(skb, &entry->state, &hook_entry);
}
switch (verdict & NF_VERDICT_MASK) {
@@ -198,7 +199,8 @@
local_bh_enable();
break;
case NF_QUEUE:
- err = nf_queue(skb, elem, &entry->state,
+ RCU_INIT_POINTER(entry->state.hook_entries, hook_entry);
+ err = nf_queue(skb, &entry->state,
verdict >> NF_VERDICT_QBITS);
if (err < 0) {
if (err == -ESRCH &&
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index bd9715e..b70d3ea 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4410,6 +4410,31 @@
}
/**
+ * nft_parse_u32_check - fetch u32 attribute and check for maximum value
+ *
+ * @attr: netlink attribute to fetch value from
+ * @max: maximum value to be stored in dest
+ * @dest: pointer to the variable
+ *
+ * Parse, check and store a given u32 netlink attribute into variable.
+ * This function returns -ERANGE if the value goes over maximum value.
+ * Otherwise a 0 is returned and the attribute value is stored in the
+ * destination variable.
+ */
+unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest)
+{
+ int val;
+
+ val = ntohl(nla_get_be32(attr));
+ if (val > max)
+ return -ERANGE;
+
+ *dest = val;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nft_parse_u32_check);
+
+/**
* nft_parse_register - parse a register value from a netlink attribute
*
* @attr: netlink attribute
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index fb8b589..0dd5c69 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -34,7 +34,7 @@
.u = {
.log = {
.level = LOGLEVEL_WARNING,
- .logflags = NF_LOG_MASK,
+ .logflags = NF_LOG_DEFAULT_MASK,
},
},
};
@@ -93,12 +93,15 @@
if (priv->base == NFT_PAYLOAD_NETWORK_HEADER)
ptr = skb_network_header(skb);
- else
+ else {
+ if (!pkt->tprot_set)
+ return false;
ptr = skb_network_header(skb) + pkt->xt.thoff;
+ }
ptr += priv->offset;
- if (unlikely(ptr + priv->len >= skb_tail_pointer(skb)))
+ if (unlikely(ptr + priv->len > skb_tail_pointer(skb)))
return false;
*dest = 0;
@@ -260,8 +263,13 @@
if (err < 0)
goto err7;
- return 0;
+ err = nft_range_module_init();
+ if (err < 0)
+ goto err8;
+ return 0;
+err8:
+ nft_dynset_module_exit();
err7:
nft_payload_module_exit();
err6:
diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c
index 6b5f762..f713cc2 100644
--- a/net/netfilter/nf_tables_inet.c
+++ b/net/netfilter/nf_tables_inet.c
@@ -82,7 +82,10 @@
{
int ret;
- nft_register_chain_type(&filter_inet);
+ ret = nft_register_chain_type(&filter_inet);
+ if (ret < 0)
+ return ret;
+
ret = register_pernet_subsys(&nf_tables_inet_net_ops);
if (ret < 0)
nft_unregister_chain_type(&filter_inet);
diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c
index 75d696f..9e2ae42 100644
--- a/net/netfilter/nf_tables_netdev.c
+++ b/net/netfilter/nf_tables_netdev.c
@@ -15,78 +15,6 @@
#include <net/netfilter/nf_tables_ipv4.h>
#include <net/netfilter/nf_tables_ipv6.h>
-static inline void
-nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- struct iphdr *iph, _iph;
- u32 len, thoff;
-
- nft_set_pktinfo(pkt, skb, state);
-
- iph = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*iph),
- &_iph);
- if (!iph)
- return;
-
- if (iph->ihl < 5 || iph->version != 4)
- return;
-
- len = ntohs(iph->tot_len);
- thoff = iph->ihl * 4;
- if (skb->len < len)
- return;
- else if (len < thoff)
- return;
-
- pkt->tprot = iph->protocol;
- pkt->xt.thoff = thoff;
- pkt->xt.fragoff = ntohs(iph->frag_off) & IP_OFFSET;
-}
-
-static inline void
-__nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- struct ipv6hdr *ip6h, _ip6h;
- unsigned int thoff = 0;
- unsigned short frag_off;
- int protohdr;
- u32 pkt_len;
-
- ip6h = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*ip6h),
- &_ip6h);
- if (!ip6h)
- return;
-
- if (ip6h->version != 6)
- return;
-
- pkt_len = ntohs(ip6h->payload_len);
- if (pkt_len + sizeof(*ip6h) > skb->len)
- return;
-
- protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL);
- if (protohdr < 0)
- return;
-
- pkt->tprot = protohdr;
- pkt->xt.thoff = thoff;
- pkt->xt.fragoff = frag_off;
-#endif
-}
-
-static inline void nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- nft_set_pktinfo(pkt, skb, state);
- __nft_netdev_set_pktinfo_ipv6(pkt, skb, state);
-}
-
static unsigned int
nft_do_chain_netdev(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
@@ -95,13 +23,13 @@
switch (skb->protocol) {
case htons(ETH_P_IP):
- nft_netdev_set_pktinfo_ipv4(&pkt, skb, state);
+ nft_set_pktinfo_ipv4_validate(&pkt, skb, state);
break;
case htons(ETH_P_IPV6):
- nft_netdev_set_pktinfo_ipv6(&pkt, skb, state);
+ nft_set_pktinfo_ipv6_validate(&pkt, skb, state);
break;
default:
- nft_set_pktinfo(&pkt, skb, state);
+ nft_set_pktinfo_unspec(&pkt, skb, state);
break;
}
@@ -221,14 +149,25 @@
{
int ret;
- nft_register_chain_type(&nft_filter_chain_netdev);
- ret = register_pernet_subsys(&nf_tables_netdev_net_ops);
- if (ret < 0) {
- nft_unregister_chain_type(&nft_filter_chain_netdev);
+ ret = nft_register_chain_type(&nft_filter_chain_netdev);
+ if (ret)
return ret;
- }
- register_netdevice_notifier(&nf_tables_netdev_notifier);
+
+ ret = register_pernet_subsys(&nf_tables_netdev_net_ops);
+ if (ret)
+ goto err1;
+
+ ret = register_netdevice_notifier(&nf_tables_netdev_notifier);
+ if (ret)
+ goto err2;
+
return 0;
+
+err2:
+ unregister_pernet_subsys(&nf_tables_netdev_net_ops);
+err1:
+ nft_unregister_chain_type(&nft_filter_chain_netdev);
+ return ret;
}
static void __exit nf_tables_netdev_exit(void)
diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c
index fa24a5b..ab695f8 100644
--- a/net/netfilter/nf_tables_trace.c
+++ b/net/netfilter/nf_tables_trace.c
@@ -113,20 +113,22 @@
const struct nft_pktinfo *pkt)
{
const struct sk_buff *skb = pkt->skb;
- unsigned int len = min_t(unsigned int,
- pkt->xt.thoff - skb_network_offset(skb),
- NFT_TRACETYPE_NETWORK_HSIZE);
int off = skb_network_offset(skb);
+ unsigned int len, nh_end;
+ nh_end = pkt->tprot_set ? pkt->xt.thoff : skb->len;
+ len = min_t(unsigned int, nh_end - skb_network_offset(skb),
+ NFT_TRACETYPE_NETWORK_HSIZE);
if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len))
return -1;
- len = min_t(unsigned int, skb->len - pkt->xt.thoff,
- NFT_TRACETYPE_TRANSPORT_HSIZE);
-
- if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb,
- pkt->xt.thoff, len))
- return -1;
+ if (pkt->tprot_set) {
+ len = min_t(unsigned int, skb->len - pkt->xt.thoff,
+ NFT_TRACETYPE_TRANSPORT_HSIZE);
+ if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb,
+ pkt->xt.thoff, len))
+ return -1;
+ }
if (!skb_mac_header_was_set(skb))
return 0;
diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c
index e924e95..3b79f34 100644
--- a/net/netfilter/nfnetlink_cthelper.c
+++ b/net/netfilter/nfnetlink_cthelper.c
@@ -43,7 +43,7 @@
if (help == NULL)
return NF_DROP;
- /* rcu_read_lock()ed by nf_hook_slow */
+ /* rcu_read_lock()ed by nf_hook_thresh */
helper = rcu_dereference(help->helper);
if (helper == NULL)
return NF_DROP;
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 6577db5..eb086a1 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -442,7 +442,9 @@
if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
htonl(indev->ifindex)) ||
/* this is the bridge group "brX" */
- /* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */
+ /* rcu_read_lock()ed by nf_hook_thresh or
+ * nf_log_packet.
+ */
nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV,
htonl(br_port_get_rcu(indev)->br->dev->ifindex)))
goto nla_put_failure;
@@ -477,7 +479,9 @@
if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
htonl(outdev->ifindex)) ||
/* this is the bridge group "brX" */
- /* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */
+ /* rcu_read_lock()ed by nf_hook_thresh or
+ * nf_log_packet.
+ */
nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
htonl(br_port_get_rcu(outdev)->br->dev->ifindex)))
goto nla_put_failure;
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index f49f450..af832c5 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -740,7 +740,7 @@
struct net *net = entry->state.net;
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
- /* rcu_read_lock()ed by nf_hook_slow() */
+ /* rcu_read_lock()ed by nf_hook_thresh */
queue = instance_lookup(q, queuenum);
if (!queue)
return -ESRCH;
@@ -917,12 +917,14 @@
.notifier_call = nfqnl_rcv_dev_event,
};
-static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long ops_ptr)
+static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long entry_ptr)
{
- return entry->elem == (struct nf_hook_ops *)ops_ptr;
+ return rcu_access_pointer(entry->state.hook_entries) ==
+ (struct nf_hook_entry *)entry_ptr;
}
-static void nfqnl_nf_hook_drop(struct net *net, struct nf_hook_ops *hook)
+static void nfqnl_nf_hook_drop(struct net *net,
+ const struct nf_hook_entry *hook)
{
struct nfnl_queue_net *q = nfnl_queue_pernet(net);
int i;
@@ -1522,9 +1524,16 @@
goto cleanup_netlink_notifier;
}
- register_netdevice_notifier(&nfqnl_dev_notifier);
+ status = register_netdevice_notifier(&nfqnl_dev_notifier);
+ if (status < 0) {
+ pr_err("nf_queue: failed to register netdevice notifier\n");
+ goto cleanup_netlink_subsys;
+ }
+
return status;
+cleanup_netlink_subsys:
+ nfnetlink_subsys_unregister(&nfqnl_subsys);
cleanup_netlink_notifier:
netlink_unregister_notifier(&nfqnl_rtnl_notifier);
unregister_pernet_subsys(&nfnl_queue_net_ops);
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index d71cc18..31c15ed 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -52,6 +52,7 @@
{
struct nft_bitwise *priv = nft_expr_priv(expr);
struct nft_data_desc d1, d2;
+ u32 len;
int err;
if (tb[NFTA_BITWISE_SREG] == NULL ||
@@ -61,7 +62,12 @@
tb[NFTA_BITWISE_XOR] == NULL)
return -EINVAL;
- priv->len = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN]));
+ err = nft_parse_u32_check(tb[NFTA_BITWISE_LEN], U8_MAX, &len);
+ if (err < 0)
+ return err;
+
+ priv->len = len;
+
priv->sreg = nft_parse_register(tb[NFTA_BITWISE_SREG]);
err = nft_validate_register_load(priv->sreg, priv->len);
if (err < 0)
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index b78c28b..ee63d98 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -99,6 +99,7 @@
const struct nlattr * const tb[])
{
struct nft_byteorder *priv = nft_expr_priv(expr);
+ u32 size, len;
int err;
if (tb[NFTA_BYTEORDER_SREG] == NULL ||
@@ -117,7 +118,12 @@
return -EINVAL;
}
- priv->size = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SIZE]));
+ err = nft_parse_u32_check(tb[NFTA_BYTEORDER_SIZE], U8_MAX, &size);
+ if (err < 0)
+ return err;
+
+ priv->size = size;
+
switch (priv->size) {
case 2:
case 4:
@@ -128,7 +134,12 @@
}
priv->sreg = nft_parse_register(tb[NFTA_BYTEORDER_SREG]);
- priv->len = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN]));
+ err = nft_parse_u32_check(tb[NFTA_BYTEORDER_LEN], U8_MAX, &len);
+ if (err < 0)
+ return err;
+
+ priv->len = len;
+
err = nft_validate_register_load(priv->sreg, priv->len);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c
index e25b35d..2e53739 100644
--- a/net/netfilter/nft_cmp.c
+++ b/net/netfilter/nft_cmp.c
@@ -84,6 +84,9 @@
if (err < 0)
return err;
+ if (desc.len > U8_MAX)
+ return -ERANGE;
+
priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP]));
priv->len = desc.len;
return 0;
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 51e180f..d7b0d171 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -128,15 +128,18 @@
memcpy(dest, &count, sizeof(count));
return;
}
+ case NFT_CT_L3PROTOCOL:
+ *dest = nf_ct_l3num(ct);
+ return;
+ case NFT_CT_PROTOCOL:
+ *dest = nf_ct_protonum(ct);
+ return;
default:
break;
}
tuple = &ct->tuplehash[priv->dir].tuple;
switch (priv->key) {
- case NFT_CT_L3PROTOCOL:
- *dest = nf_ct_l3num(ct);
- return;
case NFT_CT_SRC:
memcpy(dest, tuple->src.u3.all,
nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
@@ -145,9 +148,6 @@
memcpy(dest, tuple->dst.u3.all,
nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16);
return;
- case NFT_CT_PROTOCOL:
- *dest = nf_ct_protonum(ct);
- return;
case NFT_CT_PROTO_SRC:
*dest = (__force __u16)tuple->src.u.all;
return;
@@ -283,8 +283,9 @@
case NFT_CT_L3PROTOCOL:
case NFT_CT_PROTOCOL:
- if (tb[NFTA_CT_DIRECTION] == NULL)
- return -EINVAL;
+ /* For compatibility, do not report error if NFTA_CT_DIRECTION
+ * attribute is specified.
+ */
len = sizeof(u8);
break;
case NFT_CT_SRC:
@@ -363,6 +364,8 @@
switch (priv->key) {
#ifdef CONFIG_NF_CONNTRACK_MARK
case NFT_CT_MARK:
+ if (tb[NFTA_CT_DIRECTION])
+ return -EINVAL;
len = FIELD_SIZEOF(struct nf_conn, mark);
break;
#endif
@@ -432,8 +435,6 @@
goto nla_put_failure;
switch (priv->key) {
- case NFT_CT_L3PROTOCOL:
- case NFT_CT_PROTOCOL:
case NFT_CT_SRC:
case NFT_CT_DST:
case NFT_CT_PROTO_SRC:
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 0af2669..e3b83c3 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -22,6 +22,7 @@
enum nft_dynset_ops op:8;
enum nft_registers sreg_key:8;
enum nft_registers sreg_data:8;
+ bool invert;
u64 timeout;
struct nft_expr *expr;
struct nft_set_binding binding;
@@ -82,10 +83,14 @@
if (sexpr != NULL)
sexpr->ops->eval(sexpr, regs, pkt);
+
+ if (priv->invert)
+ regs->verdict.code = NFT_BREAK;
return;
}
out:
- regs->verdict.code = NFT_BREAK;
+ if (!priv->invert)
+ regs->verdict.code = NFT_BREAK;
}
static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = {
@@ -96,6 +101,7 @@
[NFTA_DYNSET_SREG_DATA] = { .type = NLA_U32 },
[NFTA_DYNSET_TIMEOUT] = { .type = NLA_U64 },
[NFTA_DYNSET_EXPR] = { .type = NLA_NESTED },
+ [NFTA_DYNSET_FLAGS] = { .type = NLA_U32 },
};
static int nft_dynset_init(const struct nft_ctx *ctx,
@@ -113,6 +119,15 @@
tb[NFTA_DYNSET_SREG_KEY] == NULL)
return -EINVAL;
+ if (tb[NFTA_DYNSET_FLAGS]) {
+ u32 flags = ntohl(nla_get_be32(tb[NFTA_DYNSET_FLAGS]));
+
+ if (flags & ~NFT_DYNSET_F_INV)
+ return -EINVAL;
+ if (flags & NFT_DYNSET_F_INV)
+ priv->invert = true;
+ }
+
set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME],
genmask);
if (IS_ERR(set)) {
@@ -220,6 +235,7 @@
static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_dynset *priv = nft_expr_priv(expr);
+ u32 flags = priv->invert ? NFT_DYNSET_F_INV : 0;
if (nft_dump_register(skb, NFTA_DYNSET_SREG_KEY, priv->sreg_key))
goto nla_put_failure;
@@ -235,6 +251,8 @@
goto nla_put_failure;
if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr))
goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_DYNSET_FLAGS, htonl(flags)))
+ goto nla_put_failure;
return 0;
nla_put_failure:
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 82c264e..a84cf3d 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -59,7 +59,7 @@
const struct nlattr * const tb[])
{
struct nft_exthdr *priv = nft_expr_priv(expr);
- u32 offset, len;
+ u32 offset, len, err;
if (tb[NFTA_EXTHDR_DREG] == NULL ||
tb[NFTA_EXTHDR_TYPE] == NULL ||
@@ -67,11 +67,13 @@
tb[NFTA_EXTHDR_LEN] == NULL)
return -EINVAL;
- offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET]));
- len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN]));
+ err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset);
+ if (err < 0)
+ return err;
- if (offset > U8_MAX || len > U8_MAX)
- return -ERANGE;
+ err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len);
+ if (err < 0)
+ return err;
priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
priv->offset = offset;
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 764251d..09473b4 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -23,6 +23,7 @@
u8 len;
u32 modulus;
u32 seed;
+ u32 offset;
};
static void nft_hash_eval(const struct nft_expr *expr,
@@ -31,10 +32,10 @@
{
struct nft_hash *priv = nft_expr_priv(expr);
const void *data = ®s->data[priv->sreg];
+ u32 h;
- regs->data[priv->dreg] =
- reciprocal_scale(jhash(data, priv->len, priv->seed),
- priv->modulus);
+ h = reciprocal_scale(jhash(data, priv->len, priv->seed), priv->modulus);
+ regs->data[priv->dreg] = h + priv->offset;
}
static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
@@ -59,6 +60,9 @@
!tb[NFTA_HASH_MODULUS])
return -EINVAL;
+ if (tb[NFTA_HASH_OFFSET])
+ priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET]));
+
priv->sreg = nft_parse_register(tb[NFTA_HASH_SREG]);
priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]);
@@ -72,6 +76,9 @@
if (priv->modulus <= 1)
return -ERANGE;
+ if (priv->offset + priv->modulus - 1 < priv->offset)
+ return -EOVERFLOW;
+
priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED]));
return nft_validate_register_load(priv->sreg, len) &&
@@ -94,7 +101,9 @@
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_HASH_SEED, htonl(priv->seed)))
goto nla_put_failure;
-
+ if (priv->offset != 0)
+ if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset)))
+ goto nla_put_failure;
return 0;
nla_put_failure:
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index db3b746..d17018f 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -53,6 +53,10 @@
tb[NFTA_IMMEDIATE_DATA]);
if (err < 0)
return err;
+
+ if (desc.len > U8_MAX)
+ return -ERANGE;
+
priv->dlen = desc.len;
priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]);
diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c
index 24a73bb..1b01404 100644
--- a/net/netfilter/nft_log.c
+++ b/net/netfilter/nft_log.c
@@ -58,8 +58,11 @@
if (tb[NFTA_LOG_LEVEL] != NULL &&
tb[NFTA_LOG_GROUP] != NULL)
return -EINVAL;
- if (tb[NFTA_LOG_GROUP] != NULL)
+ if (tb[NFTA_LOG_GROUP] != NULL) {
li->type = NF_LOG_TYPE_ULOG;
+ if (tb[NFTA_LOG_FLAGS] != NULL)
+ return -EINVAL;
+ }
nla = tb[NFTA_LOG_PREFIX];
if (nla != NULL) {
@@ -87,6 +90,10 @@
if (tb[NFTA_LOG_FLAGS] != NULL) {
li->u.log.logflags =
ntohl(nla_get_be32(tb[NFTA_LOG_FLAGS]));
+ if (li->u.log.logflags & ~NF_LOG_MASK) {
+ err = -EINVAL;
+ goto err1;
+ }
}
break;
case NF_LOG_TYPE_ULOG:
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index e164325..8166b69 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -43,7 +43,7 @@
return;
}
- if (found && set->flags & NFT_SET_MAP)
+ if (set->flags & NFT_SET_MAP)
nft_data_copy(®s->data[priv->dreg],
nft_set_ext_data(ext), set->dlen);
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 8a6bc76..6c1e024 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -52,6 +52,8 @@
*dest = pkt->pf;
break;
case NFT_META_L4PROTO:
+ if (!pkt->tprot_set)
+ goto err;
*dest = pkt->tprot;
break;
case NFT_META_PRIORITY:
diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c
index 294745e..55bc5ab 100644
--- a/net/netfilter/nft_numgen.c
+++ b/net/netfilter/nft_numgen.c
@@ -21,8 +21,9 @@
struct nft_ng_inc {
enum nft_registers dreg:8;
- u32 until;
+ u32 modulus;
atomic_t counter;
+ u32 offset;
};
static void nft_ng_inc_eval(const struct nft_expr *expr,
@@ -34,16 +35,17 @@
do {
oval = atomic_read(&priv->counter);
- nval = (oval + 1 < priv->until) ? oval + 1 : 0;
+ nval = (oval + 1 < priv->modulus) ? oval + 1 : 0;
} while (atomic_cmpxchg(&priv->counter, oval, nval) != oval);
- memcpy(®s->data[priv->dreg], &priv->counter, sizeof(u32));
+ regs->data[priv->dreg] = nval + priv->offset;
}
static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = {
[NFTA_NG_DREG] = { .type = NLA_U32 },
- [NFTA_NG_UNTIL] = { .type = NLA_U32 },
+ [NFTA_NG_MODULUS] = { .type = NLA_U32 },
[NFTA_NG_TYPE] = { .type = NLA_U32 },
+ [NFTA_NG_OFFSET] = { .type = NLA_U32 },
};
static int nft_ng_inc_init(const struct nft_ctx *ctx,
@@ -52,10 +54,16 @@
{
struct nft_ng_inc *priv = nft_expr_priv(expr);
- priv->until = ntohl(nla_get_be32(tb[NFTA_NG_UNTIL]));
- if (priv->until == 0)
+ if (tb[NFTA_NG_OFFSET])
+ priv->offset = ntohl(nla_get_be32(tb[NFTA_NG_OFFSET]));
+
+ priv->modulus = ntohl(nla_get_be32(tb[NFTA_NG_MODULUS]));
+ if (priv->modulus == 0)
return -ERANGE;
+ if (priv->offset + priv->modulus - 1 < priv->offset)
+ return -EOVERFLOW;
+
priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
atomic_set(&priv->counter, 0);
@@ -64,14 +72,16 @@
}
static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg,
- u32 until, enum nft_ng_types type)
+ u32 modulus, enum nft_ng_types type, u32 offset)
{
if (nft_dump_register(skb, NFTA_NG_DREG, dreg))
goto nla_put_failure;
- if (nla_put_be32(skb, NFTA_NG_UNTIL, htonl(until)))
+ if (nla_put_be32(skb, NFTA_NG_MODULUS, htonl(modulus)))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_NG_TYPE, htonl(type)))
goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_NG_OFFSET, htonl(offset)))
+ goto nla_put_failure;
return 0;
@@ -83,12 +93,14 @@
{
const struct nft_ng_inc *priv = nft_expr_priv(expr);
- return nft_ng_dump(skb, priv->dreg, priv->until, NFT_NG_INCREMENTAL);
+ return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_INCREMENTAL,
+ priv->offset);
}
struct nft_ng_random {
enum nft_registers dreg:8;
- u32 until;
+ u32 modulus;
+ u32 offset;
};
static void nft_ng_random_eval(const struct nft_expr *expr,
@@ -97,9 +109,10 @@
{
struct nft_ng_random *priv = nft_expr_priv(expr);
struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state);
+ u32 val;
- regs->data[priv->dreg] = reciprocal_scale(prandom_u32_state(state),
- priv->until);
+ val = reciprocal_scale(prandom_u32_state(state), priv->modulus);
+ regs->data[priv->dreg] = val + priv->offset;
}
static int nft_ng_random_init(const struct nft_ctx *ctx,
@@ -108,10 +121,16 @@
{
struct nft_ng_random *priv = nft_expr_priv(expr);
- priv->until = ntohl(nla_get_be32(tb[NFTA_NG_UNTIL]));
- if (priv->until == 0)
+ if (tb[NFTA_NG_OFFSET])
+ priv->offset = ntohl(nla_get_be32(tb[NFTA_NG_OFFSET]));
+
+ priv->modulus = ntohl(nla_get_be32(tb[NFTA_NG_MODULUS]));
+ if (priv->modulus == 0)
return -ERANGE;
+ if (priv->offset + priv->modulus - 1 < priv->offset)
+ return -EOVERFLOW;
+
prandom_init_once(&nft_numgen_prandom_state);
priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]);
@@ -124,7 +143,8 @@
{
const struct nft_ng_random *priv = nft_expr_priv(expr);
- return nft_ng_dump(skb, priv->dreg, priv->until, NFT_NG_RANDOM);
+ return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_RANDOM,
+ priv->offset);
}
static struct nft_expr_type nft_ng_type;
@@ -149,8 +169,8 @@
{
u32 type;
- if (!tb[NFTA_NG_DREG] ||
- !tb[NFTA_NG_UNTIL] ||
+ if (!tb[NFTA_NG_DREG] ||
+ !tb[NFTA_NG_MODULUS] ||
!tb[NFTA_NG_TYPE])
return ERR_PTR(-EINVAL);
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 12cd4bf..b2f8861 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -92,6 +92,8 @@
offset = skb_network_offset(skb);
break;
case NFT_PAYLOAD_TRANSPORT_HEADER:
+ if (!pkt->tprot_set)
+ goto err;
offset = pkt->xt.thoff;
break;
default:
@@ -184,6 +186,8 @@
offset = skb_network_offset(skb);
break;
case NFT_PAYLOAD_TRANSPORT_HEADER:
+ if (!pkt->tprot_set)
+ goto err;
offset = pkt->xt.thoff;
break;
default:
diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c
index 61d216e..393d359 100644
--- a/net/netfilter/nft_queue.c
+++ b/net/netfilter/nft_queue.c
@@ -22,9 +22,10 @@
static u32 jhash_initval __read_mostly;
struct nft_queue {
- u16 queuenum;
- u16 queues_total;
- u16 flags;
+ enum nft_registers sreg_qnum:8;
+ u16 queuenum;
+ u16 queues_total;
+ u16 flags;
};
static void nft_queue_eval(const struct nft_expr *expr,
@@ -54,27 +55,51 @@
regs->verdict.code = ret;
}
+static void nft_queue_sreg_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_queue *priv = nft_expr_priv(expr);
+ u32 queue, ret;
+
+ queue = regs->data[priv->sreg_qnum];
+
+ ret = NF_QUEUE_NR(queue);
+ if (priv->flags & NFT_QUEUE_FLAG_BYPASS)
+ ret |= NF_VERDICT_FLAG_QUEUE_BYPASS;
+
+ regs->verdict.code = ret;
+}
+
static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = {
[NFTA_QUEUE_NUM] = { .type = NLA_U16 },
[NFTA_QUEUE_TOTAL] = { .type = NLA_U16 },
[NFTA_QUEUE_FLAGS] = { .type = NLA_U16 },
+ [NFTA_QUEUE_SREG_QNUM] = { .type = NLA_U32 },
};
static int nft_queue_init(const struct nft_ctx *ctx,
- const struct nft_expr *expr,
- const struct nlattr * const tb[])
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
{
struct nft_queue *priv = nft_expr_priv(expr);
+ u32 maxid;
- if (tb[NFTA_QUEUE_NUM] == NULL)
- return -EINVAL;
-
- init_hashrandom(&jhash_initval);
priv->queuenum = ntohs(nla_get_be16(tb[NFTA_QUEUE_NUM]));
- if (tb[NFTA_QUEUE_TOTAL] != NULL)
+ if (tb[NFTA_QUEUE_TOTAL])
priv->queues_total = ntohs(nla_get_be16(tb[NFTA_QUEUE_TOTAL]));
- if (tb[NFTA_QUEUE_FLAGS] != NULL) {
+ else
+ priv->queues_total = 1;
+
+ if (priv->queues_total == 0)
+ return -EINVAL;
+
+ maxid = priv->queues_total - 1 + priv->queuenum;
+ if (maxid > U16_MAX)
+ return -ERANGE;
+
+ if (tb[NFTA_QUEUE_FLAGS]) {
priv->flags = ntohs(nla_get_be16(tb[NFTA_QUEUE_FLAGS]));
if (priv->flags & ~NFT_QUEUE_FLAG_MASK)
return -EINVAL;
@@ -82,6 +107,29 @@
return 0;
}
+static int nft_queue_sreg_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_queue *priv = nft_expr_priv(expr);
+ int err;
+
+ priv->sreg_qnum = nft_parse_register(tb[NFTA_QUEUE_SREG_QNUM]);
+ err = nft_validate_register_load(priv->sreg_qnum, sizeof(u32));
+ if (err < 0)
+ return err;
+
+ if (tb[NFTA_QUEUE_FLAGS]) {
+ priv->flags = ntohs(nla_get_be16(tb[NFTA_QUEUE_FLAGS]));
+ if (priv->flags & ~NFT_QUEUE_FLAG_MASK)
+ return -EINVAL;
+ if (priv->flags & NFT_QUEUE_FLAG_CPU_FANOUT)
+ return -EOPNOTSUPP;
+ }
+
+ return 0;
+}
+
static int nft_queue_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
const struct nft_queue *priv = nft_expr_priv(expr);
@@ -97,6 +145,21 @@
return -1;
}
+static int
+nft_queue_sreg_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_queue *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_QUEUE_SREG_QNUM, priv->sreg_qnum) ||
+ nla_put_be16(skb, NFTA_QUEUE_FLAGS, htons(priv->flags)))
+ goto nla_put_failure;
+
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
static struct nft_expr_type nft_queue_type;
static const struct nft_expr_ops nft_queue_ops = {
.type = &nft_queue_type,
@@ -106,9 +169,35 @@
.dump = nft_queue_dump,
};
+static const struct nft_expr_ops nft_queue_sreg_ops = {
+ .type = &nft_queue_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_queue)),
+ .eval = nft_queue_sreg_eval,
+ .init = nft_queue_sreg_init,
+ .dump = nft_queue_sreg_dump,
+};
+
+static const struct nft_expr_ops *
+nft_queue_select_ops(const struct nft_ctx *ctx,
+ const struct nlattr * const tb[])
+{
+ if (tb[NFTA_QUEUE_NUM] && tb[NFTA_QUEUE_SREG_QNUM])
+ return ERR_PTR(-EINVAL);
+
+ init_hashrandom(&jhash_initval);
+
+ if (tb[NFTA_QUEUE_NUM])
+ return &nft_queue_ops;
+
+ if (tb[NFTA_QUEUE_SREG_QNUM])
+ return &nft_queue_sreg_ops;
+
+ return ERR_PTR(-EINVAL);
+}
+
static struct nft_expr_type nft_queue_type __read_mostly = {
.name = "queue",
- .ops = &nft_queue_ops,
+ .select_ops = &nft_queue_select_ops,
.policy = nft_queue_policy,
.maxattr = NFTA_QUEUE_MAX,
.owner = THIS_MODULE,
diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c
index 6eafbf9..c00104c 100644
--- a/net/netfilter/nft_quota.c
+++ b/net/netfilter/nft_quota.c
@@ -21,10 +21,10 @@
atomic64_t remain;
};
-static inline long nft_quota(struct nft_quota *priv,
- const struct nft_pktinfo *pkt)
+static inline bool nft_overquota(struct nft_quota *priv,
+ const struct nft_pktinfo *pkt)
{
- return atomic64_sub_return(pkt->skb->len, &priv->remain);
+ return atomic64_sub_return(pkt->skb->len, &priv->remain) < 0;
}
static void nft_quota_eval(const struct nft_expr *expr,
@@ -33,7 +33,7 @@
{
struct nft_quota *priv = nft_expr_priv(expr);
- if (nft_quota(priv, pkt) < 0 && !priv->invert)
+ if (nft_overquota(priv, pkt) ^ priv->invert)
regs->verdict.code = NFT_BREAK;
}
diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
new file mode 100644
index 0000000..c6d5358
--- /dev/null
+++ b/net/netfilter/nft_range.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2016 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_tables.h>
+
+struct nft_range_expr {
+ struct nft_data data_from;
+ struct nft_data data_to;
+ enum nft_registers sreg:8;
+ u8 len;
+ enum nft_range_ops op:8;
+};
+
+static void nft_range_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_range_expr *priv = nft_expr_priv(expr);
+ bool mismatch;
+ int d1, d2;
+
+ d1 = memcmp(®s->data[priv->sreg], &priv->data_from, priv->len);
+ d2 = memcmp(®s->data[priv->sreg], &priv->data_to, priv->len);
+ switch (priv->op) {
+ case NFT_RANGE_EQ:
+ mismatch = (d1 < 0 || d2 > 0);
+ break;
+ case NFT_RANGE_NEQ:
+ mismatch = (d1 >= 0 && d2 <= 0);
+ break;
+ }
+
+ if (mismatch)
+ regs->verdict.code = NFT_BREAK;
+}
+
+static const struct nla_policy nft_range_policy[NFTA_RANGE_MAX + 1] = {
+ [NFTA_RANGE_SREG] = { .type = NLA_U32 },
+ [NFTA_RANGE_OP] = { .type = NLA_U32 },
+ [NFTA_RANGE_FROM_DATA] = { .type = NLA_NESTED },
+ [NFTA_RANGE_TO_DATA] = { .type = NLA_NESTED },
+};
+
+static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_range_expr *priv = nft_expr_priv(expr);
+ struct nft_data_desc desc_from, desc_to;
+ int err;
+
+ err = nft_data_init(NULL, &priv->data_from, sizeof(priv->data_from),
+ &desc_from, tb[NFTA_RANGE_FROM_DATA]);
+ if (err < 0)
+ return err;
+
+ err = nft_data_init(NULL, &priv->data_to, sizeof(priv->data_to),
+ &desc_to, tb[NFTA_RANGE_TO_DATA]);
+ if (err < 0)
+ goto err1;
+
+ if (desc_from.len != desc_to.len) {
+ err = -EINVAL;
+ goto err2;
+ }
+
+ priv->sreg = nft_parse_register(tb[NFTA_RANGE_SREG]);
+ err = nft_validate_register_load(priv->sreg, desc_from.len);
+ if (err < 0)
+ goto err2;
+
+ priv->op = ntohl(nla_get_be32(tb[NFTA_RANGE_OP]));
+ priv->len = desc_from.len;
+ return 0;
+err2:
+ nft_data_uninit(&priv->data_to, desc_to.type);
+err1:
+ nft_data_uninit(&priv->data_from, desc_from.type);
+ return err;
+}
+
+static int nft_range_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_range_expr *priv = nft_expr_priv(expr);
+
+ if (nft_dump_register(skb, NFTA_RANGE_SREG, priv->sreg))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_RANGE_OP, htonl(priv->op)))
+ goto nla_put_failure;
+
+ if (nft_data_dump(skb, NFTA_RANGE_FROM_DATA, &priv->data_from,
+ NFT_DATA_VALUE, priv->len) < 0 ||
+ nft_data_dump(skb, NFTA_RANGE_TO_DATA, &priv->data_to,
+ NFT_DATA_VALUE, priv->len) < 0)
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_range_type;
+static const struct nft_expr_ops nft_range_ops = {
+ .type = &nft_range_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_range_expr)),
+ .eval = nft_range_eval,
+ .init = nft_range_init,
+ .dump = nft_range_dump,
+};
+
+static struct nft_expr_type nft_range_type __read_mostly = {
+ .name = "range",
+ .ops = &nft_range_ops,
+ .policy = nft_range_policy,
+ .maxattr = NFTA_RANGE_MAX,
+ .owner = THIS_MODULE,
+};
+
+int __init nft_range_module_init(void)
+{
+ return nft_register_expr(&nft_range_type);
+}
+
+void nft_range_module_exit(void)
+{
+ nft_unregister_expr(&nft_range_type);
+}
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 515131f..dbd6c4a 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -24,7 +24,6 @@
#define RATEEST_HSIZE 16
static struct hlist_head rateest_hash[RATEEST_HSIZE] __read_mostly;
static unsigned int jhash_rnd __read_mostly;
-static bool rnd_inited __read_mostly;
static unsigned int xt_rateest_hash(const char *name)
{
@@ -99,10 +98,7 @@
} cfg;
int ret;
- if (unlikely(!rnd_inited)) {
- get_random_bytes(&jhash_rnd, sizeof(jhash_rnd));
- rnd_inited = true;
- }
+ net_get_random_once(&jhash_rnd, sizeof(jhash_rnd));
est = xt_rateest_lookup(info->name);
if (est) {
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index e118397..872db2d 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -110,18 +110,14 @@
if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
struct net *net = par->net;
unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family);
+ unsigned int min_mtu = min(dst_mtu(skb_dst(skb)), in_mtu);
- if (dst_mtu(skb_dst(skb)) <= minlen) {
+ if (min_mtu <= minlen) {
net_err_ratelimited("unknown or invalid path-MTU (%u)\n",
- dst_mtu(skb_dst(skb)));
+ min_mtu);
return -1;
}
- if (in_mtu <= minlen) {
- net_err_ratelimited("unknown or invalid path-MTU (%u)\n",
- in_mtu);
- return -1;
- }
- newmss = min(dst_mtu(skb_dst(skb)), in_mtu) - minlen;
+ newmss = min_mtu - minlen;
} else
newmss = info->mss;
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 6e57a39..0471db4 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -89,6 +89,8 @@
return -EINVAL;
if (info->oif[0]) {
+ int ret;
+
if (info->oif[sizeof(info->oif)-1] != '\0')
return -EINVAL;
@@ -101,7 +103,11 @@
priv->notifier.notifier_call = tee_netdev_event;
info->priv = priv;
- register_netdevice_notifier(&priv->notifier);
+ ret = register_netdevice_notifier(&priv->notifier);
+ if (ret) {
+ kfree(priv);
+ return ret;
+ }
} else
info->priv = NULL;
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 99bbc82..b6dc322 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -366,14 +366,8 @@
unsigned int i;
int ret;
- if (unlikely(!connlimit_rnd)) {
- u_int32_t rand;
+ net_get_random_once(&connlimit_rnd, sizeof(connlimit_rnd));
- do {
- get_random_bytes(&rand, sizeof(rand));
- } while (!rand);
- cmpxchg(&connlimit_rnd, 0, rand);
- }
ret = nf_ct_l3proto_try_module_get(par->family);
if (ret < 0) {
pr_info("cannot load conntrack support for "
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 1786968..44a095e 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -56,6 +56,7 @@
}
/* need to declare this at the top */
+static const struct file_operations dl_file_ops_v1;
static const struct file_operations dl_file_ops;
/* hash table crap */
@@ -86,8 +87,8 @@
unsigned long expires; /* precalculated expiry time */
struct {
unsigned long prev; /* last modification */
- u_int32_t credit;
- u_int32_t credit_cap, cost;
+ u_int64_t credit;
+ u_int64_t credit_cap, cost;
} rateinfo;
struct rcu_head rcu;
};
@@ -98,7 +99,7 @@
u_int8_t family;
bool rnd_initialized;
- struct hashlimit_cfg1 cfg; /* config */
+ struct hashlimit_cfg2 cfg; /* config */
/* used internally */
spinlock_t lock; /* lock for list_head */
@@ -114,6 +115,30 @@
struct hlist_head hash[0]; /* hashtable itself */
};
+static int
+cfg_copy(struct hashlimit_cfg2 *to, void *from, int revision)
+{
+ if (revision == 1) {
+ struct hashlimit_cfg1 *cfg = (struct hashlimit_cfg1 *)from;
+
+ to->mode = cfg->mode;
+ to->avg = cfg->avg;
+ to->burst = cfg->burst;
+ to->size = cfg->size;
+ to->max = cfg->max;
+ to->gc_interval = cfg->gc_interval;
+ to->expire = cfg->expire;
+ to->srcmask = cfg->srcmask;
+ to->dstmask = cfg->dstmask;
+ } else if (revision == 2) {
+ memcpy(to, from, sizeof(struct hashlimit_cfg2));
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static DEFINE_MUTEX(hashlimit_mutex); /* protects htables list */
static struct kmem_cache *hashlimit_cachep __read_mostly;
@@ -215,16 +240,18 @@
}
static void htable_gc(struct work_struct *work);
-static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo,
- u_int8_t family)
+static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg,
+ const char *name, u_int8_t family,
+ struct xt_hashlimit_htable **out_hinfo,
+ int revision)
{
struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
struct xt_hashlimit_htable *hinfo;
- unsigned int size;
- unsigned int i;
+ unsigned int size, i;
+ int ret;
- if (minfo->cfg.size) {
- size = minfo->cfg.size;
+ if (cfg->size) {
+ size = cfg->size;
} else {
size = (totalram_pages << PAGE_SHIFT) / 16384 /
sizeof(struct list_head);
@@ -238,10 +265,14 @@
sizeof(struct list_head) * size);
if (hinfo == NULL)
return -ENOMEM;
- minfo->hinfo = hinfo;
+ *out_hinfo = hinfo;
/* copy match config into hashtable config */
- memcpy(&hinfo->cfg, &minfo->cfg, sizeof(hinfo->cfg));
+ ret = cfg_copy(&hinfo->cfg, (void *)cfg, 2);
+
+ if (ret)
+ return ret;
+
hinfo->cfg.size = size;
if (hinfo->cfg.max == 0)
hinfo->cfg.max = 8 * hinfo->cfg.size;
@@ -255,17 +286,18 @@
hinfo->count = 0;
hinfo->family = family;
hinfo->rnd_initialized = false;
- hinfo->name = kstrdup(minfo->name, GFP_KERNEL);
+ hinfo->name = kstrdup(name, GFP_KERNEL);
if (!hinfo->name) {
vfree(hinfo);
return -ENOMEM;
}
spin_lock_init(&hinfo->lock);
- hinfo->pde = proc_create_data(minfo->name, 0,
+ hinfo->pde = proc_create_data(name, 0,
(family == NFPROTO_IPV4) ?
hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit,
- &dl_file_ops, hinfo);
+ (revision == 1) ? &dl_file_ops_v1 : &dl_file_ops,
+ hinfo);
if (hinfo->pde == NULL) {
kfree(hinfo->name);
vfree(hinfo);
@@ -398,7 +430,8 @@
(slowest userspace tool allows), which means
CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32 ie.
*/
-#define MAX_CPJ (0xFFFFFFFF / (HZ*60*60*24))
+#define MAX_CPJ_v1 (0xFFFFFFFF / (HZ*60*60*24))
+#define MAX_CPJ (0xFFFFFFFFFFFFFFFF / (HZ*60*60*24))
/* Repeated shift and or gives us all 1s, final shift and add 1 gives
* us the power of 2 below the theoretical max, so GCC simply does a
@@ -408,9 +441,12 @@
#define _POW2_BELOW8(x) (_POW2_BELOW4(x)|_POW2_BELOW4((x)>>4))
#define _POW2_BELOW16(x) (_POW2_BELOW8(x)|_POW2_BELOW8((x)>>8))
#define _POW2_BELOW32(x) (_POW2_BELOW16(x)|_POW2_BELOW16((x)>>16))
+#define _POW2_BELOW64(x) (_POW2_BELOW32(x)|_POW2_BELOW32((x)>>32))
#define POW2_BELOW32(x) ((_POW2_BELOW32(x)>>1) + 1)
+#define POW2_BELOW64(x) ((_POW2_BELOW64(x)>>1) + 1)
-#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
+#define CREDITS_PER_JIFFY POW2_BELOW64(MAX_CPJ)
+#define CREDITS_PER_JIFFY_v1 POW2_BELOW32(MAX_CPJ_v1)
/* in byte mode, the lowest possible rate is one packet/second.
* credit_cap is used as a counter that tells us how many times we can
@@ -425,14 +461,24 @@
}
/* Precision saver. */
-static u32 user2credits(u32 user)
+static u64 user2credits(u64 user, int revision)
{
- /* If multiplying would overflow... */
- if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
- /* Divide first. */
- return (user / XT_HASHLIMIT_SCALE) * HZ * CREDITS_PER_JIFFY;
+ if (revision == 1) {
+ /* If multiplying would overflow... */
+ if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY_v1))
+ /* Divide first. */
+ return (user / XT_HASHLIMIT_SCALE) *\
+ HZ * CREDITS_PER_JIFFY_v1;
- return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE;
+ return (user * HZ * CREDITS_PER_JIFFY_v1) \
+ / XT_HASHLIMIT_SCALE;
+ } else {
+ if (user > 0xFFFFFFFFFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
+ return (user / XT_HASHLIMIT_SCALE_v2) *\
+ HZ * CREDITS_PER_JIFFY;
+
+ return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE_v2;
+ }
}
static u32 user2credits_byte(u32 user)
@@ -442,10 +488,11 @@
return (u32) (us >> 32);
}
-static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode)
+static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now,
+ u32 mode, int revision)
{
unsigned long delta = now - dh->rateinfo.prev;
- u32 cap;
+ u64 cap, cpj;
if (delta == 0)
return;
@@ -453,7 +500,7 @@
dh->rateinfo.prev = now;
if (mode & XT_HASHLIMIT_BYTES) {
- u32 tmp = dh->rateinfo.credit;
+ u64 tmp = dh->rateinfo.credit;
dh->rateinfo.credit += CREDITS_PER_JIFFY_BYTES * delta;
cap = CREDITS_PER_JIFFY_BYTES * HZ;
if (tmp >= dh->rateinfo.credit) {/* overflow */
@@ -461,7 +508,9 @@
return;
}
} else {
- dh->rateinfo.credit += delta * CREDITS_PER_JIFFY;
+ cpj = (revision == 1) ?
+ CREDITS_PER_JIFFY_v1 : CREDITS_PER_JIFFY;
+ dh->rateinfo.credit += delta * cpj;
cap = dh->rateinfo.credit_cap;
}
if (dh->rateinfo.credit > cap)
@@ -469,7 +518,7 @@
}
static void rateinfo_init(struct dsthash_ent *dh,
- struct xt_hashlimit_htable *hinfo)
+ struct xt_hashlimit_htable *hinfo, int revision)
{
dh->rateinfo.prev = jiffies;
if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) {
@@ -478,8 +527,8 @@
dh->rateinfo.credit_cap = hinfo->cfg.burst;
} else {
dh->rateinfo.credit = user2credits(hinfo->cfg.avg *
- hinfo->cfg.burst);
- dh->rateinfo.cost = user2credits(hinfo->cfg.avg);
+ hinfo->cfg.burst, revision);
+ dh->rateinfo.cost = user2credits(hinfo->cfg.avg, revision);
dh->rateinfo.credit_cap = dh->rateinfo.credit;
}
}
@@ -603,15 +652,15 @@
}
static bool
-hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
+hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par,
+ struct xt_hashlimit_htable *hinfo,
+ const struct hashlimit_cfg2 *cfg, int revision)
{
- const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
- struct xt_hashlimit_htable *hinfo = info->hinfo;
unsigned long now = jiffies;
struct dsthash_ent *dh;
struct dsthash_dst dst;
bool race = false;
- u32 cost;
+ u64 cost;
if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
goto hotdrop;
@@ -626,18 +675,18 @@
} else if (race) {
/* Already got an entry, update expiration timeout */
dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
- rateinfo_recalc(dh, now, hinfo->cfg.mode);
+ rateinfo_recalc(dh, now, hinfo->cfg.mode, revision);
} else {
dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
- rateinfo_init(dh, hinfo);
+ rateinfo_init(dh, hinfo, revision);
}
} else {
/* update expiration timeout */
dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
- rateinfo_recalc(dh, now, hinfo->cfg.mode);
+ rateinfo_recalc(dh, now, hinfo->cfg.mode, revision);
}
- if (info->cfg.mode & XT_HASHLIMIT_BYTES)
+ if (cfg->mode & XT_HASHLIMIT_BYTES)
cost = hashlimit_byte_cost(skb->len, dh);
else
cost = dh->rateinfo.cost;
@@ -647,73 +696,136 @@
dh->rateinfo.credit -= cost;
spin_unlock(&dh->lock);
rcu_read_unlock_bh();
- return !(info->cfg.mode & XT_HASHLIMIT_INVERT);
+ return !(cfg->mode & XT_HASHLIMIT_INVERT);
}
spin_unlock(&dh->lock);
rcu_read_unlock_bh();
/* default match is underlimit - so over the limit, we need to invert */
- return info->cfg.mode & XT_HASHLIMIT_INVERT;
+ return cfg->mode & XT_HASHLIMIT_INVERT;
hotdrop:
par->hotdrop = true;
return false;
}
-static int hashlimit_mt_check(const struct xt_mtchk_param *par)
+static bool
+hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
{
- struct net *net = par->net;
- struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
+ const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
+ struct xt_hashlimit_htable *hinfo = info->hinfo;
+ struct hashlimit_cfg2 cfg = {};
int ret;
- if (info->cfg.gc_interval == 0 || info->cfg.expire == 0)
- return -EINVAL;
- if (info->name[sizeof(info->name)-1] != '\0')
+ ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
+
+ if (ret)
+ return ret;
+
+ return hashlimit_mt_common(skb, par, hinfo, &cfg, 1);
+}
+
+static bool
+hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+ struct xt_hashlimit_htable *hinfo = info->hinfo;
+
+ return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 2);
+}
+
+static int hashlimit_mt_check_common(const struct xt_mtchk_param *par,
+ struct xt_hashlimit_htable **hinfo,
+ struct hashlimit_cfg2 *cfg,
+ const char *name, int revision)
+{
+ struct net *net = par->net;
+ int ret;
+
+ if (cfg->gc_interval == 0 || cfg->expire == 0)
return -EINVAL;
if (par->family == NFPROTO_IPV4) {
- if (info->cfg.srcmask > 32 || info->cfg.dstmask > 32)
+ if (cfg->srcmask > 32 || cfg->dstmask > 32)
return -EINVAL;
} else {
- if (info->cfg.srcmask > 128 || info->cfg.dstmask > 128)
+ if (cfg->srcmask > 128 || cfg->dstmask > 128)
return -EINVAL;
}
- if (info->cfg.mode & ~XT_HASHLIMIT_ALL) {
+ if (cfg->mode & ~XT_HASHLIMIT_ALL) {
pr_info("Unknown mode mask %X, kernel too old?\n",
- info->cfg.mode);
+ cfg->mode);
return -EINVAL;
}
/* Check for overflow. */
- if (info->cfg.mode & XT_HASHLIMIT_BYTES) {
- if (user2credits_byte(info->cfg.avg) == 0) {
- pr_info("overflow, rate too high: %u\n", info->cfg.avg);
+ if (cfg->mode & XT_HASHLIMIT_BYTES) {
+ if (user2credits_byte(cfg->avg) == 0) {
+ pr_info("overflow, rate too high: %llu\n", cfg->avg);
return -EINVAL;
}
- } else if (info->cfg.burst == 0 ||
- user2credits(info->cfg.avg * info->cfg.burst) <
- user2credits(info->cfg.avg)) {
- pr_info("overflow, try lower: %u/%u\n",
- info->cfg.avg, info->cfg.burst);
+ } else if (cfg->burst == 0 ||
+ user2credits(cfg->avg * cfg->burst, revision) <
+ user2credits(cfg->avg, revision)) {
+ pr_info("overflow, try lower: %llu/%llu\n",
+ cfg->avg, cfg->burst);
return -ERANGE;
}
mutex_lock(&hashlimit_mutex);
- info->hinfo = htable_find_get(net, info->name, par->family);
- if (info->hinfo == NULL) {
- ret = htable_create(net, info, par->family);
+ *hinfo = htable_find_get(net, name, par->family);
+ if (*hinfo == NULL) {
+ ret = htable_create(net, cfg, name, par->family,
+ hinfo, revision);
if (ret < 0) {
mutex_unlock(&hashlimit_mutex);
return ret;
}
}
mutex_unlock(&hashlimit_mutex);
+
return 0;
}
+static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par)
+{
+ struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
+ struct hashlimit_cfg2 cfg = {};
+ int ret;
+
+ if (info->name[sizeof(info->name) - 1] != '\0')
+ return -EINVAL;
+
+ ret = cfg_copy(&cfg, (void *)&info->cfg, 1);
+
+ if (ret)
+ return ret;
+
+ return hashlimit_mt_check_common(par, &info->hinfo,
+ &cfg, info->name, 1);
+}
+
+static int hashlimit_mt_check(const struct xt_mtchk_param *par)
+{
+ struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
+
+ if (info->name[sizeof(info->name) - 1] != '\0')
+ return -EINVAL;
+
+ return hashlimit_mt_check_common(par, &info->hinfo, &info->cfg,
+ info->name, 2);
+}
+
+static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par)
+{
+ const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
+
+ htable_put(info->hinfo);
+}
+
static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par)
{
- const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
+ const struct xt_hashlimit_mtinfo2 *info = par->matchinfo;
htable_put(info->hinfo);
}
@@ -723,8 +835,18 @@
.name = "hashlimit",
.revision = 1,
.family = NFPROTO_IPV4,
- .match = hashlimit_mt,
+ .match = hashlimit_mt_v1,
.matchsize = sizeof(struct xt_hashlimit_mtinfo1),
+ .checkentry = hashlimit_mt_check_v1,
+ .destroy = hashlimit_mt_destroy_v1,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "hashlimit",
+ .revision = 2,
+ .family = NFPROTO_IPV4,
+ .match = hashlimit_mt,
+ .matchsize = sizeof(struct xt_hashlimit_mtinfo2),
.checkentry = hashlimit_mt_check,
.destroy = hashlimit_mt_destroy,
.me = THIS_MODULE,
@@ -734,8 +856,18 @@
.name = "hashlimit",
.revision = 1,
.family = NFPROTO_IPV6,
- .match = hashlimit_mt,
+ .match = hashlimit_mt_v1,
.matchsize = sizeof(struct xt_hashlimit_mtinfo1),
+ .checkentry = hashlimit_mt_check_v1,
+ .destroy = hashlimit_mt_destroy_v1,
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "hashlimit",
+ .revision = 2,
+ .family = NFPROTO_IPV6,
+ .match = hashlimit_mt,
+ .matchsize = sizeof(struct xt_hashlimit_mtinfo2),
.checkentry = hashlimit_mt_check,
.destroy = hashlimit_mt_destroy,
.me = THIS_MODULE,
@@ -786,18 +918,12 @@
spin_unlock_bh(&htable->lock);
}
-static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
- struct seq_file *s)
+static void dl_seq_print(struct dsthash_ent *ent, u_int8_t family,
+ struct seq_file *s)
{
- const struct xt_hashlimit_htable *ht = s->private;
-
- spin_lock(&ent->lock);
- /* recalculate to show accurate numbers */
- rateinfo_recalc(ent, jiffies, ht->cfg.mode);
-
switch (family) {
case NFPROTO_IPV4:
- seq_printf(s, "%ld %pI4:%u->%pI4:%u %u %u %u\n",
+ seq_printf(s, "%ld %pI4:%u->%pI4:%u %llu %llu %llu\n",
(long)(ent->expires - jiffies)/HZ,
&ent->dst.ip.src,
ntohs(ent->dst.src_port),
@@ -808,7 +934,7 @@
break;
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
case NFPROTO_IPV6:
- seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n",
+ seq_printf(s, "%ld %pI6:%u->%pI6:%u %llu %llu %llu\n",
(long)(ent->expires - jiffies)/HZ,
&ent->dst.ip6.src,
ntohs(ent->dst.src_port),
@@ -821,10 +947,52 @@
default:
BUG();
}
+}
+
+static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family,
+ struct seq_file *s)
+{
+ const struct xt_hashlimit_htable *ht = s->private;
+
+ spin_lock(&ent->lock);
+ /* recalculate to show accurate numbers */
+ rateinfo_recalc(ent, jiffies, ht->cfg.mode, 1);
+
+ dl_seq_print(ent, family, s);
+
spin_unlock(&ent->lock);
return seq_has_overflowed(s);
}
+static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,
+ struct seq_file *s)
+{
+ const struct xt_hashlimit_htable *ht = s->private;
+
+ spin_lock(&ent->lock);
+ /* recalculate to show accurate numbers */
+ rateinfo_recalc(ent, jiffies, ht->cfg.mode, 2);
+
+ dl_seq_print(ent, family, s);
+
+ spin_unlock(&ent->lock);
+ return seq_has_overflowed(s);
+}
+
+static int dl_seq_show_v1(struct seq_file *s, void *v)
+{
+ struct xt_hashlimit_htable *htable = s->private;
+ unsigned int *bucket = (unsigned int *)v;
+ struct dsthash_ent *ent;
+
+ if (!hlist_empty(&htable->hash[*bucket])) {
+ hlist_for_each_entry(ent, &htable->hash[*bucket], node)
+ if (dl_seq_real_show_v1(ent, htable->family, s))
+ return -1;
+ }
+ return 0;
+}
+
static int dl_seq_show(struct seq_file *s, void *v)
{
struct xt_hashlimit_htable *htable = s->private;
@@ -839,6 +1007,13 @@
return 0;
}
+static const struct seq_operations dl_seq_ops_v1 = {
+ .start = dl_seq_start,
+ .next = dl_seq_next,
+ .stop = dl_seq_stop,
+ .show = dl_seq_show_v1
+};
+
static const struct seq_operations dl_seq_ops = {
.start = dl_seq_start,
.next = dl_seq_next,
@@ -846,9 +1021,9 @@
.show = dl_seq_show
};
-static int dl_proc_open(struct inode *inode, struct file *file)
+static int dl_proc_open_v1(struct inode *inode, struct file *file)
{
- int ret = seq_open(file, &dl_seq_ops);
+ int ret = seq_open(file, &dl_seq_ops_v1);
if (!ret) {
struct seq_file *sf = file->private_data;
@@ -857,6 +1032,26 @@
return ret;
}
+static int dl_proc_open(struct inode *inode, struct file *file)
+{
+ int ret = seq_open(file, &dl_seq_ops);
+
+ if (!ret) {
+ struct seq_file *sf = file->private_data;
+
+ sf->private = PDE_DATA(inode);
+ }
+ return ret;
+}
+
+static const struct file_operations dl_file_ops_v1 = {
+ .owner = THIS_MODULE,
+ .open = dl_proc_open_v1,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release
+};
+
static const struct file_operations dl_file_ops = {
.owner = THIS_MODULE,
.open = dl_proc_open,
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 9f4ab00..f679dd4 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -41,7 +41,7 @@
if (!master_help)
return ret;
- /* rcu_read_lock()ed by nf_hook_slow */
+ /* rcu_read_lock()ed by nf_hook_thresh */
helper = rcu_dereference(master_help->helper);
if (!helper)
return ret;
@@ -65,7 +65,7 @@
par->family);
return ret;
}
- info->name[29] = '\0';
+ info->name[sizeof(info->name) - 1] = '\0';
return 0;
}
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index d725a27..e3b7a09 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -110,7 +110,6 @@
#endif
static u_int32_t hash_rnd __read_mostly;
-static bool hash_rnd_inited __read_mostly;
static inline unsigned int recent_entry_hash4(const union nf_inet_addr *addr)
{
@@ -340,10 +339,8 @@
int ret = -EINVAL;
size_t sz;
- if (unlikely(!hash_rnd_inited)) {
- get_random_bytes(&hash_rnd, sizeof(hash_rnd));
- hash_rnd_inited = true;
- }
+ net_get_random_once(&hash_rnd, sizeof(hash_rnd));
+
if (info->check_set & ~XT_RECENT_VALID_FLAGS) {
pr_info("Unsupported user space flags (%08x)\n",
info->check_set);