Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says:
====================
Netfilter updates for net-next
The following patchset contains Netfilter updates for your net-next tree.
They are:
* nf_tables set timeout infrastructure from Patrick Mchardy.
1) Add support for set timeout support.
2) Add support for set element timeouts using the new set extension
infrastructure.
4) Add garbage collection helper functions to get rid of stale elements.
Elements are accumulated in a batch that are asynchronously released
via RCU when the batch is full.
5) Add garbage collection synchronization helpers. This introduces a new
element busy bit to address concurrent access from the netlink API and the
garbage collector.
5) Add timeout support for the nft_hash set implementation. The garbage
collector peridically checks for stale elements from the workqueue.
* iptables/nftables cgroup fixes:
6) Ignore non full-socket objects from the input path, otherwise cgroup
match may crash, from Daniel Borkmann.
7) Fix cgroup in nf_tables.
8) Save some cycles from xt_socket by skipping packet header parsing when
skb->sk is already set because of early demux. Also from Daniel.
* br_netfilter updates from Florian Westphal.
9) Save frag_max_size and restore it from the forward path too.
10) Use a per-cpu area to restore the original source MAC address when traffic
is DNAT'ed.
11) Add helper functions to access physical devices.
12) Use these new physdev helper function from xt_physdev.
13) Add another nf_bridge_info_get() helper function to fetch the br_netfilter
state information.
14) Annotate original layer 2 protocol number in nf_bridge info, instead of
using kludgy flags.
15) Also annotate the pkttype mangling when the packet travels back and forth
from the IP to the bridge layer, instead of using a flag.
* More nf_tables set enhancement from Patrick:
16) Fix possible usage of set variant that doesn't support timeouts.
17) Avoid spurious "set is full" errors from Netlink API when there are pending
stale elements scheduled to be released.
18) Restrict loop checks to set maps.
19) Add support for dynamic set updates from the packet path.
20) Add support to store optional user data (eg. comments) per set element.
BTW, I have also pulled net-next into nf-next to anticipate the conflict
resolution between your okfn() signature changes and Florian's br_netfilter
updates.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index 5fc0a0f..ab8f76d 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -2,7 +2,7 @@
#define __LINUX_BRIDGE_NETFILTER_H
#include <uapi/linux/netfilter_bridge.h>
-
+#include <linux/skbuff.h>
enum nf_br_hook_priorities {
NF_BR_PRI_FIRST = INT_MIN,
@@ -17,15 +17,12 @@
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-#define BRNF_PKT_TYPE 0x01
#define BRNF_BRIDGED_DNAT 0x02
#define BRNF_NF_BRIDGE_PREROUTING 0x08
-#define BRNF_8021Q 0x10
-#define BRNF_PPPoE 0x20
static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
{
- if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE))
+ if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE)
return PPPOE_SES_HLEN;
return 0;
}
@@ -40,6 +37,27 @@
skb_dst_drop(skb);
}
+static inline int nf_bridge_get_physinif(const struct sk_buff *skb)
+{
+ return skb->nf_bridge ? skb->nf_bridge->physindev->ifindex : 0;
+}
+
+static inline int nf_bridge_get_physoutif(const struct sk_buff *skb)
+{
+ return skb->nf_bridge ? skb->nf_bridge->physoutdev->ifindex : 0;
+}
+
+static inline struct net_device *
+nf_bridge_get_physindev(const struct sk_buff *skb)
+{
+ return skb->nf_bridge ? skb->nf_bridge->physindev : NULL;
+}
+
+static inline struct net_device *
+nf_bridge_get_physoutdev(const struct sk_buff *skb)
+{
+ return skb->nf_bridge ? skb->nf_bridge->physoutdev : NULL;
+}
#else
#define br_drop_fake_rtable(skb) do { } while (0)
#endif /* CONFIG_BRIDGE_NETFILTER */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 36f3f43..0991259 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -166,10 +166,16 @@
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
struct nf_bridge_info {
atomic_t use;
+ enum {
+ BRNF_PROTO_UNCHANGED,
+ BRNF_PROTO_8021Q,
+ BRNF_PROTO_PPPOE
+ } orig_proto;
+ bool pkt_otherhost;
unsigned int mask;
struct net_device *physindev;
struct net_device *physoutdev;
- unsigned long data[32 / sizeof(unsigned long)];
+ char neigh_header[8];
};
#endif
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 8049819..d6a2f0e 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -195,6 +195,7 @@
};
struct nft_set_ext;
+struct nft_expr;
/**
* struct nft_set_ops - nf_tables set operations
@@ -217,6 +218,15 @@
bool (*lookup)(const struct nft_set *set,
const struct nft_data *key,
const struct nft_set_ext **ext);
+ bool (*update)(struct nft_set *set,
+ const struct nft_data *key,
+ void *(*new)(struct nft_set *,
+ const struct nft_expr *,
+ struct nft_data []),
+ const struct nft_expr *expr,
+ struct nft_data data[],
+ const struct nft_set_ext **ext);
+
int (*insert)(const struct nft_set *set,
const struct nft_set_elem *elem);
void (*activate)(const struct nft_set *set,
@@ -257,6 +267,9 @@
* @dtype: data type (verdict or numeric type defined by userspace)
* @size: maximum set size
* @nelems: number of elements
+ * @ndeact: number of deactivated elements queued for removal
+ * @timeout: default timeout value in msecs
+ * @gc_int: garbage collection interval in msecs
* @policy: set parameterization (see enum nft_set_policies)
* @ops: set ops
* @pnet: network namespace
@@ -272,7 +285,10 @@
u32 ktype;
u32 dtype;
u32 size;
- u32 nelems;
+ atomic_t nelems;
+ u32 ndeact;
+ u64 timeout;
+ u32 gc_int;
u16 policy;
/* runtime data below here */
const struct nft_set_ops *ops ____cacheline_aligned;
@@ -289,16 +305,27 @@
return (void *)set->data;
}
+static inline struct nft_set *nft_set_container_of(const void *priv)
+{
+ return (void *)priv - offsetof(struct nft_set, data);
+}
+
struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
const struct nlattr *nla);
struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
const struct nlattr *nla);
+static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
+{
+ return set->gc_int ? msecs_to_jiffies(set->gc_int) : HZ;
+}
+
/**
* struct nft_set_binding - nf_tables set binding
*
* @list: set bindings list node
* @chain: chain containing the rule bound to the set
+ * @flags: set action flags
*
* A set binding contains all information necessary for validation
* of new elements added to a bound set.
@@ -306,6 +333,7 @@
struct nft_set_binding {
struct list_head list;
const struct nft_chain *chain;
+ u32 flags;
};
int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
@@ -319,12 +347,18 @@
* @NFT_SET_EXT_KEY: element key
* @NFT_SET_EXT_DATA: mapping data
* @NFT_SET_EXT_FLAGS: element flags
+ * @NFT_SET_EXT_TIMEOUT: element timeout
+ * @NFT_SET_EXT_EXPIRATION: element expiration time
+ * @NFT_SET_EXT_USERDATA: user data associated with the element
* @NFT_SET_EXT_NUM: number of extension types
*/
enum nft_set_extensions {
NFT_SET_EXT_KEY,
NFT_SET_EXT_DATA,
NFT_SET_EXT_FLAGS,
+ NFT_SET_EXT_TIMEOUT,
+ NFT_SET_EXT_EXPIRATION,
+ NFT_SET_EXT_USERDATA,
NFT_SET_EXT_NUM
};
@@ -421,15 +455,97 @@
return nft_set_ext(ext, NFT_SET_EXT_FLAGS);
}
+static inline u64 *nft_set_ext_timeout(const struct nft_set_ext *ext)
+{
+ return nft_set_ext(ext, NFT_SET_EXT_TIMEOUT);
+}
+
+static inline unsigned long *nft_set_ext_expiration(const struct nft_set_ext *ext)
+{
+ return nft_set_ext(ext, NFT_SET_EXT_EXPIRATION);
+}
+
+static inline struct nft_userdata *nft_set_ext_userdata(const struct nft_set_ext *ext)
+{
+ return nft_set_ext(ext, NFT_SET_EXT_USERDATA);
+}
+
+static inline bool nft_set_elem_expired(const struct nft_set_ext *ext)
+{
+ return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) &&
+ time_is_before_eq_jiffies(*nft_set_ext_expiration(ext));
+}
+
static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set,
void *elem)
{
return elem + set->ops->elemsize;
}
+void *nft_set_elem_init(const struct nft_set *set,
+ const struct nft_set_ext_tmpl *tmpl,
+ const struct nft_data *key,
+ const struct nft_data *data,
+ u64 timeout, gfp_t gfp);
void nft_set_elem_destroy(const struct nft_set *set, void *elem);
/**
+ * struct nft_set_gc_batch_head - nf_tables set garbage collection batch
+ *
+ * @rcu: rcu head
+ * @set: set the elements belong to
+ * @cnt: count of elements
+ */
+struct nft_set_gc_batch_head {
+ struct rcu_head rcu;
+ const struct nft_set *set;
+ unsigned int cnt;
+};
+
+#define NFT_SET_GC_BATCH_SIZE ((PAGE_SIZE - \
+ sizeof(struct nft_set_gc_batch_head)) / \
+ sizeof(void *))
+
+/**
+ * struct nft_set_gc_batch - nf_tables set garbage collection batch
+ *
+ * @head: GC batch head
+ * @elems: garbage collection elements
+ */
+struct nft_set_gc_batch {
+ struct nft_set_gc_batch_head head;
+ void *elems[NFT_SET_GC_BATCH_SIZE];
+};
+
+struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
+ gfp_t gfp);
+void nft_set_gc_batch_release(struct rcu_head *rcu);
+
+static inline void nft_set_gc_batch_complete(struct nft_set_gc_batch *gcb)
+{
+ if (gcb != NULL)
+ call_rcu(&gcb->head.rcu, nft_set_gc_batch_release);
+}
+
+static inline struct nft_set_gc_batch *
+nft_set_gc_batch_check(const struct nft_set *set, struct nft_set_gc_batch *gcb,
+ gfp_t gfp)
+{
+ if (gcb != NULL) {
+ if (gcb->head.cnt + 1 < ARRAY_SIZE(gcb->elems))
+ return gcb;
+ nft_set_gc_batch_complete(gcb);
+ }
+ return nft_set_gc_batch_alloc(set, gfp);
+}
+
+static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb,
+ void *elem)
+{
+ gcb->elems[gcb->head.cnt++] = elem;
+}
+
+/**
* struct nft_expr_type - nf_tables expression type
*
* @select_ops: function to select nft_expr_ops
@@ -750,6 +866,8 @@
return 1 << ACCESS_ONCE(net->nft.gencursor);
}
+#define NFT_GENMASK_ANY ((1 << 0) | (1 << 1))
+
/*
* Set element transaction helpers
*/
@@ -766,6 +884,41 @@
ext->genmask ^= nft_genmask_next(read_pnet(&set->pnet));
}
+/*
+ * We use a free bit in the genmask field to indicate the element
+ * is busy, meaning it is currently being processed either by
+ * the netlink API or GC.
+ *
+ * Even though the genmask is only a single byte wide, this works
+ * because the extension structure if fully constant once initialized,
+ * so there are no non-atomic write accesses unless it is already
+ * marked busy.
+ */
+#define NFT_SET_ELEM_BUSY_MASK (1 << 2)
+
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+#define NFT_SET_ELEM_BUSY_BIT 2
+#elif defined(__BIG_ENDIAN_BITFIELD)
+#define NFT_SET_ELEM_BUSY_BIT (BITS_PER_LONG - BITS_PER_BYTE + 2)
+#else
+#error
+#endif
+
+static inline int nft_set_elem_mark_busy(struct nft_set_ext *ext)
+{
+ unsigned long *word = (unsigned long *)ext;
+
+ BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
+ return test_and_set_bit(NFT_SET_ELEM_BUSY_BIT, word);
+}
+
+static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext)
+{
+ unsigned long *word = (unsigned long *)ext;
+
+ clear_bit(NFT_SET_ELEM_BUSY_BIT, word);
+}
+
/**
* struct nft_trans - nf_tables object update in transaction
*
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index a75fc8e..c6f400c 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -31,6 +31,9 @@
int nft_lookup_module_init(void);
void nft_lookup_module_exit(void);
+int nft_dynset_module_init(void);
+void nft_dynset_module_exit(void);
+
int nft_bitwise_module_init(void);
void nft_bitwise_module_exit(void);
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index b978393..05ee1e0 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -208,12 +208,14 @@
* @NFT_SET_CONSTANT: set contents may not change while bound
* @NFT_SET_INTERVAL: set contains intervals
* @NFT_SET_MAP: set is used as a dictionary
+ * @NFT_SET_TIMEOUT: set uses timeouts
*/
enum nft_set_flags {
NFT_SET_ANONYMOUS = 0x1,
NFT_SET_CONSTANT = 0x2,
NFT_SET_INTERVAL = 0x4,
NFT_SET_MAP = 0x8,
+ NFT_SET_TIMEOUT = 0x10,
};
/**
@@ -252,6 +254,8 @@
* @NFTA_SET_POLICY: selection policy (NLA_U32)
* @NFTA_SET_DESC: set description (NLA_NESTED)
* @NFTA_SET_ID: uniquely identifies a set in a transaction (NLA_U32)
+ * @NFTA_SET_TIMEOUT: default timeout value (NLA_U64)
+ * @NFTA_SET_GC_INTERVAL: garbage collection interval (NLA_U32)
*/
enum nft_set_attributes {
NFTA_SET_UNSPEC,
@@ -265,6 +269,8 @@
NFTA_SET_POLICY,
NFTA_SET_DESC,
NFTA_SET_ID,
+ NFTA_SET_TIMEOUT,
+ NFTA_SET_GC_INTERVAL,
__NFTA_SET_MAX
};
#define NFTA_SET_MAX (__NFTA_SET_MAX - 1)
@@ -284,12 +290,18 @@
* @NFTA_SET_ELEM_KEY: key value (NLA_NESTED: nft_data)
* @NFTA_SET_ELEM_DATA: data value of mapping (NLA_NESTED: nft_data_attributes)
* @NFTA_SET_ELEM_FLAGS: bitmask of nft_set_elem_flags (NLA_U32)
+ * @NFTA_SET_ELEM_TIMEOUT: timeout value (NLA_U64)
+ * @NFTA_SET_ELEM_EXPIRATION: expiration time (NLA_U64)
+ * @NFTA_SET_ELEM_USERDATA: user data (NLA_BINARY)
*/
enum nft_set_elem_attributes {
NFTA_SET_ELEM_UNSPEC,
NFTA_SET_ELEM_KEY,
NFTA_SET_ELEM_DATA,
NFTA_SET_ELEM_FLAGS,
+ NFTA_SET_ELEM_TIMEOUT,
+ NFTA_SET_ELEM_EXPIRATION,
+ NFTA_SET_ELEM_USERDATA,
__NFTA_SET_ELEM_MAX
};
#define NFTA_SET_ELEM_MAX (__NFTA_SET_ELEM_MAX - 1)
@@ -505,6 +517,33 @@
};
#define NFTA_LOOKUP_MAX (__NFTA_LOOKUP_MAX - 1)
+enum nft_dynset_ops {
+ NFT_DYNSET_OP_ADD,
+ NFT_DYNSET_OP_UPDATE,
+};
+
+/**
+ * enum nft_dynset_attributes - dynset expression attributes
+ *
+ * @NFTA_DYNSET_SET_NAME: name of set the to add data to (NLA_STRING)
+ * @NFTA_DYNSET_SET_ID: uniquely identifier of the set in the transaction (NLA_U32)
+ * @NFTA_DYNSET_OP: operation (NLA_U32)
+ * @NFTA_DYNSET_SREG_KEY: source register of the key (NLA_U32)
+ * @NFTA_DYNSET_SREG_DATA: source register of the data (NLA_U32)
+ * @NFTA_DYNSET_TIMEOUT: timeout value for the new element (NLA_U64)
+ */
+enum nft_dynset_attributes {
+ NFTA_DYNSET_UNSPEC,
+ NFTA_DYNSET_SET_NAME,
+ NFTA_DYNSET_SET_ID,
+ NFTA_DYNSET_OP,
+ NFTA_DYNSET_SREG_KEY,
+ NFTA_DYNSET_SREG_DATA,
+ NFTA_DYNSET_TIMEOUT,
+ __NFTA_DYNSET_MAX,
+};
+#define NFTA_DYNSET_MAX (__NFTA_DYNSET_MAX - 1)
+
/**
* enum nft_payload_bases - nf_tables payload expression offset bases
*
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index acd31c9..ab55e24 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -111,6 +111,24 @@
pppoe_proto(skb) == htons(PPP_IPV6) && \
brnf_filter_pppoe_tagged)
+/* largest possible L2 header, see br_nf_dev_queue_xmit() */
+#define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN)
+
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+struct brnf_frag_data {
+ char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH];
+ u8 encap_size;
+ u8 size;
+};
+
+static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage);
+#endif
+
+static struct nf_bridge_info *nf_bridge_info_get(const struct sk_buff *skb)
+{
+ return skb->nf_bridge;
+}
+
static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
{
struct net_bridge_port *port;
@@ -189,14 +207,6 @@
skb->network_header += len;
}
-static inline void nf_bridge_save_header(struct sk_buff *skb)
-{
- int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
-
- skb_copy_from_linear_data_offset(skb, -header_size,
- skb->nf_bridge->data, header_size);
-}
-
/* When handing a packet over to the IP layer
* check whether we have a skb that is in the
* expected format
@@ -252,10 +262,16 @@
static void nf_bridge_update_protocol(struct sk_buff *skb)
{
- if (skb->nf_bridge->mask & BRNF_8021Q)
+ switch (skb->nf_bridge->orig_proto) {
+ case BRNF_PROTO_8021Q:
skb->protocol = htons(ETH_P_8021Q);
- else if (skb->nf_bridge->mask & BRNF_PPPoE)
+ break;
+ case BRNF_PROTO_PPPOE:
skb->protocol = htons(ETH_P_PPP_SES);
+ break;
+ case BRNF_PROTO_UNCHANGED:
+ break;
+ }
}
/* PF_BRIDGE/PRE_ROUTING *********************************************/
@@ -263,12 +279,12 @@
* bridge PRE_ROUTING hook. */
static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
{
- struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct rtable *rt;
- if (nf_bridge->mask & BRNF_PKT_TYPE) {
+ if (nf_bridge->pkt_otherhost) {
skb->pkt_type = PACKET_OTHERHOST;
- nf_bridge->mask ^= BRNF_PKT_TYPE;
+ nf_bridge->pkt_otherhost = false;
}
nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
@@ -296,7 +312,6 @@
*/
static int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb)
{
- struct nf_bridge_info *nf_bridge = skb->nf_bridge;
struct neighbour *neigh;
struct dst_entry *dst;
@@ -306,6 +321,7 @@
dst = skb_dst(skb);
neigh = dst_neigh_lookup_skb(dst, skb);
if (neigh) {
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
int ret;
if (neigh->hh.hh_len) {
@@ -319,7 +335,7 @@
*/
skb_copy_from_linear_data_offset(skb,
-(ETH_HLEN-ETH_ALEN),
- skb->nf_bridge->data,
+ nf_bridge->neigh_header,
ETH_HLEN-ETH_ALEN);
/* tell br_dev_xmit to continue with forwarding */
nf_bridge->mask |= BRNF_BRIDGED_DNAT;
@@ -392,7 +408,7 @@
{
struct net_device *dev = skb->dev;
struct iphdr *iph = ip_hdr(skb);
- struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct rtable *rt;
int err;
int frag_max_size;
@@ -400,9 +416,9 @@
frag_max_size = IPCB(skb)->frag_max_size;
BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size;
- if (nf_bridge->mask & BRNF_PKT_TYPE) {
+ if (nf_bridge->pkt_otherhost) {
skb->pkt_type = PACKET_OTHERHOST;
- nf_bridge->mask ^= BRNF_PKT_TYPE;
+ nf_bridge->pkt_otherhost = false;
}
nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
if (dnat_took_place(skb)) {
@@ -485,20 +501,21 @@
/* Some common code for IPv4/IPv6 */
static struct net_device *setup_pre_routing(struct sk_buff *skb)
{
- struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
if (skb->pkt_type == PACKET_OTHERHOST) {
skb->pkt_type = PACKET_HOST;
- nf_bridge->mask |= BRNF_PKT_TYPE;
+ nf_bridge->pkt_otherhost = true;
}
nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING;
nf_bridge->physindev = skb->dev;
skb->dev = brnf_get_logical_dev(skb, skb->dev);
+
if (skb->protocol == htons(ETH_P_8021Q))
- nf_bridge->mask |= BRNF_8021Q;
+ nf_bridge->orig_proto = BRNF_PROTO_8021Q;
else if (skb->protocol == htons(ETH_P_PPP_SES))
- nf_bridge->mask |= BRNF_PPPoE;
+ nf_bridge->orig_proto = BRNF_PROTO_PPPOE;
/* Must drop socket now because of tproxy. */
skb_orphan(skb);
@@ -680,14 +697,21 @@
/* PF_BRIDGE/FORWARD *************************************************/
static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb)
{
- struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct net_device *in;
if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) {
+ int frag_max_size;
+
+ if (skb->protocol == htons(ETH_P_IP)) {
+ frag_max_size = IPCB(skb)->frag_max_size;
+ BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size;
+ }
+
in = nf_bridge->physindev;
- if (nf_bridge->mask & BRNF_PKT_TYPE) {
+ if (nf_bridge->pkt_otherhost) {
skb->pkt_type = PACKET_OTHERHOST;
- nf_bridge->mask ^= BRNF_PKT_TYPE;
+ nf_bridge->pkt_otherhost = false;
}
nf_bridge_update_protocol(skb);
} else {
@@ -722,6 +746,10 @@
if (!nf_bridge_unshare(skb))
return NF_DROP;
+ nf_bridge = nf_bridge_info_get(skb);
+ if (!nf_bridge)
+ return NF_DROP;
+
parent = bridge_parent(state->out);
if (!parent)
return NF_DROP;
@@ -735,14 +763,19 @@
nf_bridge_pull_encap_header(skb);
- nf_bridge = skb->nf_bridge;
if (skb->pkt_type == PACKET_OTHERHOST) {
skb->pkt_type = PACKET_HOST;
- nf_bridge->mask |= BRNF_PKT_TYPE;
+ nf_bridge->pkt_otherhost = true;
}
- if (pf == NFPROTO_IPV4 && br_parse_ip_options(skb))
- return NF_DROP;
+ if (pf == NFPROTO_IPV4) {
+ int frag_max = BR_INPUT_SKB_CB(skb)->frag_max_size;
+
+ if (br_parse_ip_options(skb))
+ return NF_DROP;
+
+ IPCB(skb)->frag_max_size = frag_max;
+ }
nf_bridge->physoutdev = skb->dev;
if (pf == NFPROTO_IPV4)
@@ -792,30 +825,22 @@
}
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
-static bool nf_bridge_copy_header(struct sk_buff *skb)
-{
- int err;
- unsigned int header_size;
-
- nf_bridge_update_protocol(skb);
- header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
- err = skb_cow_head(skb, header_size);
- if (err)
- return false;
-
- skb_copy_to_linear_data_offset(skb, -header_size,
- skb->nf_bridge->data, header_size);
- __skb_push(skb, nf_bridge_encap_header_len(skb));
- return true;
-}
-
static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb)
{
- if (!nf_bridge_copy_header(skb)) {
+ struct brnf_frag_data *data;
+ int err;
+
+ data = this_cpu_ptr(&brnf_frag_data_storage);
+ err = skb_cow_head(skb, data->size);
+
+ if (err) {
kfree_skb(skb);
return 0;
}
+ skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size);
+ __skb_push(skb, data->encap_size);
+
return br_dev_queue_push_xmit(sk, skb);
}
@@ -833,14 +858,27 @@
* boundaries by preserving frag_list rather than refragmenting.
*/
if (skb->len + mtu_reserved > skb->dev->mtu) {
+ struct brnf_frag_data *data;
+
frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
if (br_parse_ip_options(skb))
/* Drop invalid packet */
return NF_DROP;
IPCB(skb)->frag_max_size = frag_max_size;
+
+ nf_bridge_update_protocol(skb);
+
+ data = this_cpu_ptr(&brnf_frag_data_storage);
+ data->encap_size = nf_bridge_encap_header_len(skb);
+ data->size = ETH_HLEN + data->encap_size;
+
+ skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
+ data->size);
+
ret = ip_fragment(sk, skb, br_nf_push_frag_xmit);
- } else
+ } else {
ret = br_dev_queue_push_xmit(sk, skb);
+ }
return ret;
}
@@ -856,7 +894,7 @@
struct sk_buff *skb,
const struct nf_hook_state *state)
{
- struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
struct net_device *realoutdev = bridge_parent(skb->dev);
u_int8_t pf;
@@ -882,11 +920,10 @@
* about the value of skb->pkt_type. */
if (skb->pkt_type == PACKET_OTHERHOST) {
skb->pkt_type = PACKET_HOST;
- nf_bridge->mask |= BRNF_PKT_TYPE;
+ nf_bridge->pkt_otherhost = true;
}
nf_bridge_pull_encap_header(skb);
- nf_bridge_save_header(skb);
if (pf == NFPROTO_IPV4)
skb->protocol = htons(ETH_P_IP);
else
@@ -925,13 +962,16 @@
*/
static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
{
- struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+ struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
skb_pull(skb, ETH_HLEN);
nf_bridge->mask &= ~BRNF_BRIDGED_DNAT;
- skb_copy_to_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN),
- skb->nf_bridge->data, ETH_HLEN-ETH_ALEN);
+ BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN));
+
+ skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN),
+ nf_bridge->neigh_header,
+ ETH_HLEN - ETH_ALEN);
skb->dev = nf_bridge->physindev;
br_handle_frame_finish(NULL, skb);
}
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index c5b794d..3262e41 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -13,6 +13,7 @@
#include <net/dst.h>
#include <net/netfilter/ipv4/nf_reject.h>
#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_bridge.h>
#include <net/netfilter/ipv4/nf_reject.h>
const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb,
@@ -146,7 +147,8 @@
*/
if (oldskb->nf_bridge) {
struct ethhdr *oeth = eth_hdr(oldskb);
- nskb->dev = oldskb->nf_bridge->physindev;
+
+ nskb->dev = nf_bridge_get_physindev(oldskb);
niph->tot_len = htons(nskb->len);
ip_send_check(niph);
if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index 3afdce0..94b4c6d 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -13,6 +13,7 @@
#include <net/ip6_checksum.h>
#include <net/netfilter/ipv6/nf_reject.h>
#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_bridge.h>
#include <net/netfilter/ipv6/nf_reject.h>
const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb,
@@ -195,7 +196,8 @@
*/
if (oldskb->nf_bridge) {
struct ethhdr *oeth = eth_hdr(oldskb);
- nskb->dev = oldskb->nf_bridge->physindev;
+
+ nskb->dev = nf_bridge_get_physindev(oldskb);
nskb->protocol = htons(ETH_P_IPV6);
ip6h->payload_len = htons(sizeof(struct tcphdr));
if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 89f73a9..a87d8b8 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -70,7 +70,7 @@
# nf_tables
nf_tables-objs += nf_tables_core.o nf_tables_api.o
-nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o
+nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o
nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o
obj-$(CONFIG_NF_TABLES) += nf_tables.o
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 758b002..380ef51 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -19,6 +19,7 @@
#include <net/netlink.h>
#include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
#include <linux/netfilter/ipset/pfxlen.h>
#include <linux/netfilter/ipset/ip_set.h>
#include <linux/netfilter/ipset/ip_set_hash.h>
@@ -211,6 +212,22 @@
#define HKEY_DATALEN sizeof(struct hash_netiface4_elem_hashed)
#include "ip_set_hash_gen.h"
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+static const char *get_physindev_name(const struct sk_buff *skb)
+{
+ struct net_device *dev = nf_bridge_get_physindev(skb);
+
+ return dev ? dev->name : NULL;
+}
+
+static const char *get_phyoutdev_name(const struct sk_buff *skb)
+{
+ struct net_device *dev = nf_bridge_get_physoutdev(skb);
+
+ return dev ? dev->name : NULL;
+}
+#endif
+
static int
hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
const struct xt_action_param *par,
@@ -234,16 +251,15 @@
e.ip &= ip_set_netmask(e.cidr);
#define IFACE(dir) (par->dir ? par->dir->name : NULL)
-#define PHYSDEV(dir) (nf_bridge->dir ? nf_bridge->dir->name : NULL)
#define SRCDIR (opt->flags & IPSET_DIM_TWO_SRC)
if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- const struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+ e.iface = SRCDIR ? get_physindev_name(skb) :
+ get_phyoutdev_name(skb);
- if (!nf_bridge)
+ if (!e.iface)
return -EINVAL;
- e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev);
e.physdev = 1;
#else
e.iface = NULL;
@@ -476,11 +492,11 @@
if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- const struct nf_bridge_info *nf_bridge = skb->nf_bridge;
-
- if (!nf_bridge)
+ e.iface = SRCDIR ? get_physindev_name(skb) :
+ get_phyoutdev_name(skb);
+ if (!e.iface)
return -EINVAL;
- e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev);
+
e.physdev = 1;
#else
e.iface = NULL;
diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c
index 2631876..a5aa596 100644
--- a/net/netfilter/nf_log_common.c
+++ b/net/netfilter/nf_log_common.c
@@ -17,6 +17,7 @@
#include <net/route.h>
#include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
#include <linux/netfilter/xt_LOG.h>
#include <net/netfilter/nf_log.h>
@@ -163,10 +164,10 @@
const struct net_device *physindev;
const struct net_device *physoutdev;
- physindev = skb->nf_bridge->physindev;
+ physindev = nf_bridge_get_physindev(skb);
if (physindev && in != physindev)
nf_log_buf_add(m, "PHYSIN=%s ", physindev->name);
- physoutdev = skb->nf_bridge->physoutdev;
+ physoutdev = nf_bridge_get_physoutdev(skb);
if (physoutdev && out != physoutdev)
nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name);
}
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 3f3ac57..2e88032 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -10,6 +10,7 @@
#include <linux/proc_fs.h>
#include <linux/skbuff.h>
#include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
#include <linux/seq_file.h>
#include <linux/rcupdate.h>
#include <net/protocol.h>
@@ -58,12 +59,14 @@
sock_put(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (entry->skb->nf_bridge) {
- struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
+ struct net_device *physdev;
- if (nf_bridge->physindev)
- dev_put(nf_bridge->physindev);
- if (nf_bridge->physoutdev)
- dev_put(nf_bridge->physoutdev);
+ physdev = nf_bridge_get_physindev(entry->skb);
+ if (physdev)
+ dev_put(physdev);
+ physdev = nf_bridge_get_physoutdev(entry->skb);
+ if (physdev)
+ dev_put(physdev);
}
#endif
/* Drop reference to owner of hook which queued us. */
@@ -87,13 +90,12 @@
sock_hold(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (entry->skb->nf_bridge) {
- struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
struct net_device *physdev;
- physdev = nf_bridge->physindev;
+ physdev = nf_bridge_get_physindev(entry->skb);
if (physdev)
dev_hold(physdev);
- physdev = nf_bridge->physoutdev;
+ physdev = nf_bridge_get_physoutdev(entry->skb);
if (physdev)
dev_hold(physdev);
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 5604c2d..0b96fa0 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2159,7 +2159,7 @@
features = 0;
if (nla[NFTA_SET_FLAGS] != NULL) {
features = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
- features &= NFT_SET_INTERVAL | NFT_SET_MAP;
+ features &= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_TIMEOUT;
}
bops = NULL;
@@ -2216,6 +2216,8 @@
[NFTA_SET_POLICY] = { .type = NLA_U32 },
[NFTA_SET_DESC] = { .type = NLA_NESTED },
[NFTA_SET_ID] = { .type = NLA_U32 },
+ [NFTA_SET_TIMEOUT] = { .type = NLA_U64 },
+ [NFTA_SET_GC_INTERVAL] = { .type = NLA_U32 },
};
static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
@@ -2366,6 +2368,13 @@
goto nla_put_failure;
}
+ if (set->timeout &&
+ nla_put_be64(skb, NFTA_SET_TIMEOUT, cpu_to_be64(set->timeout)))
+ goto nla_put_failure;
+ if (set->gc_int &&
+ nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int)))
+ goto nla_put_failure;
+
if (set->policy != NFT_SET_POL_PERFORMANCE) {
if (nla_put_be32(skb, NFTA_SET_POLICY, htonl(set->policy)))
goto nla_put_failure;
@@ -2578,7 +2587,8 @@
char name[IFNAMSIZ];
unsigned int size;
bool create;
- u32 ktype, dtype, flags, policy;
+ u64 timeout;
+ u32 ktype, dtype, flags, policy, gc_int;
struct nft_set_desc desc;
int err;
@@ -2605,7 +2615,8 @@
if (nla[NFTA_SET_FLAGS] != NULL) {
flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT |
- NFT_SET_INTERVAL | NFT_SET_MAP))
+ NFT_SET_INTERVAL | NFT_SET_MAP |
+ NFT_SET_TIMEOUT))
return -EINVAL;
}
@@ -2631,6 +2642,19 @@
} else if (flags & NFT_SET_MAP)
return -EINVAL;
+ timeout = 0;
+ if (nla[NFTA_SET_TIMEOUT] != NULL) {
+ if (!(flags & NFT_SET_TIMEOUT))
+ return -EINVAL;
+ timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_TIMEOUT]));
+ }
+ gc_int = 0;
+ if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
+ if (!(flags & NFT_SET_TIMEOUT))
+ return -EINVAL;
+ gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
+ }
+
policy = NFT_SET_POL_PERFORMANCE;
if (nla[NFTA_SET_POLICY] != NULL)
policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
@@ -2699,6 +2723,8 @@
set->flags = flags;
set->size = desc.size;
set->policy = policy;
+ set->timeout = timeout;
+ set->gc_int = gc_int;
err = ops->init(set, &desc, nla);
if (err < 0)
@@ -2785,12 +2811,13 @@
if (!list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
return -EBUSY;
- if (set->flags & NFT_SET_MAP) {
+ if (binding->flags & NFT_SET_MAP) {
/* If the set is already bound to the same chain all
* jumps are already validated for that chain.
*/
list_for_each_entry(i, &set->bindings, list) {
- if (i->chain == binding->chain)
+ if (binding->flags & NFT_SET_MAP &&
+ i->chain == binding->chain)
goto bind;
}
@@ -2837,6 +2864,18 @@
.len = sizeof(u8),
.align = __alignof__(u8),
},
+ [NFT_SET_EXT_TIMEOUT] = {
+ .len = sizeof(u64),
+ .align = __alignof__(u64),
+ },
+ [NFT_SET_EXT_EXPIRATION] = {
+ .len = sizeof(unsigned long),
+ .align = __alignof__(unsigned long),
+ },
+ [NFT_SET_EXT_USERDATA] = {
+ .len = sizeof(struct nft_userdata),
+ .align = __alignof__(struct nft_userdata),
+ },
};
EXPORT_SYMBOL_GPL(nft_set_ext_types);
@@ -2848,6 +2887,9 @@
[NFTA_SET_ELEM_KEY] = { .type = NLA_NESTED },
[NFTA_SET_ELEM_DATA] = { .type = NLA_NESTED },
[NFTA_SET_ELEM_FLAGS] = { .type = NLA_U32 },
+ [NFTA_SET_ELEM_TIMEOUT] = { .type = NLA_U64 },
+ [NFTA_SET_ELEM_USERDATA] = { .type = NLA_BINARY,
+ .len = NFT_USERDATA_MAXLEN },
};
static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {
@@ -2909,6 +2951,34 @@
htonl(*nft_set_ext_flags(ext))))
goto nla_put_failure;
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) &&
+ nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT,
+ cpu_to_be64(*nft_set_ext_timeout(ext))))
+ goto nla_put_failure;
+
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
+ unsigned long expires, now = jiffies;
+
+ expires = *nft_set_ext_expiration(ext);
+ if (time_before(now, expires))
+ expires -= now;
+ else
+ expires = 0;
+
+ if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION,
+ cpu_to_be64(jiffies_to_msecs(expires))))
+ goto nla_put_failure;
+ }
+
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_USERDATA)) {
+ struct nft_userdata *udata;
+
+ udata = nft_set_ext_userdata(ext);
+ if (nla_put(skb, NFTA_SET_ELEM_USERDATA,
+ udata->len + 1, udata->data))
+ goto nla_put_failure;
+ }
+
nla_nest_end(skb, nest);
return 0;
@@ -3128,11 +3198,11 @@
return trans;
}
-static void *nft_set_elem_init(const struct nft_set *set,
- const struct nft_set_ext_tmpl *tmpl,
- const struct nft_data *key,
- const struct nft_data *data,
- gfp_t gfp)
+void *nft_set_elem_init(const struct nft_set *set,
+ const struct nft_set_ext_tmpl *tmpl,
+ const struct nft_data *key,
+ const struct nft_data *data,
+ u64 timeout, gfp_t gfp)
{
struct nft_set_ext *ext;
void *elem;
@@ -3147,6 +3217,11 @@
memcpy(nft_set_ext_key(ext), key, set->klen);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
memcpy(nft_set_ext_data(ext), data, set->dlen);
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION))
+ *nft_set_ext_expiration(ext) =
+ jiffies + msecs_to_jiffies(timeout);
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT))
+ *nft_set_ext_timeout(ext) = timeout;
return elem;
}
@@ -3172,15 +3247,15 @@
struct nft_set_ext *ext;
struct nft_set_elem elem;
struct nft_set_binding *binding;
+ struct nft_userdata *udata;
struct nft_data data;
enum nft_registers dreg;
struct nft_trans *trans;
+ u64 timeout;
u32 flags;
+ u8 ulen;
int err;
- if (set->size && set->nelems == set->size)
- return -ENFILE;
-
err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
nft_set_elem_policy);
if (err < 0)
@@ -3215,6 +3290,15 @@
return -EINVAL;
}
+ timeout = 0;
+ if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) {
+ if (!(set->flags & NFT_SET_TIMEOUT))
+ return -EINVAL;
+ timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_ELEM_TIMEOUT]));
+ } else if (set->flags & NFT_SET_TIMEOUT) {
+ timeout = set->timeout;
+ }
+
err = nft_data_init(ctx, &elem.key, &d1, nla[NFTA_SET_ELEM_KEY]);
if (err < 0)
goto err1;
@@ -3223,6 +3307,11 @@
goto err2;
nft_set_ext_add(&tmpl, NFT_SET_EXT_KEY);
+ if (timeout > 0) {
+ nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION);
+ if (timeout != set->timeout)
+ nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
+ }
if (nla[NFTA_SET_ELEM_DATA] != NULL) {
err = nft_data_init(ctx, &data, &d2, nla[NFTA_SET_ELEM_DATA]);
@@ -3241,6 +3330,9 @@
.chain = (struct nft_chain *)binding->chain,
};
+ if (!(binding->flags & NFT_SET_MAP))
+ continue;
+
err = nft_validate_data_load(&bind_ctx, dreg,
&data, d2.type);
if (err < 0)
@@ -3250,20 +3342,38 @@
nft_set_ext_add(&tmpl, NFT_SET_EXT_DATA);
}
+ /* The full maximum length of userdata can exceed the maximum
+ * offset value (U8_MAX) for following extensions, therefor it
+ * must be the last extension added.
+ */
+ ulen = 0;
+ if (nla[NFTA_SET_ELEM_USERDATA] != NULL) {
+ ulen = nla_len(nla[NFTA_SET_ELEM_USERDATA]);
+ if (ulen > 0)
+ nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA,
+ ulen);
+ }
+
err = -ENOMEM;
- elem.priv = nft_set_elem_init(set, &tmpl, &elem.key, &data, GFP_KERNEL);
+ elem.priv = nft_set_elem_init(set, &tmpl, &elem.key, &data,
+ timeout, GFP_KERNEL);
if (elem.priv == NULL)
goto err3;
ext = nft_set_elem_ext(set, elem.priv);
if (flags)
*nft_set_ext_flags(ext) = flags;
+ if (ulen > 0) {
+ udata = nft_set_ext_userdata(ext);
+ udata->len = ulen - 1;
+ nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen);
+ }
trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
if (trans == NULL)
goto err4;
- ext->genmask = nft_genmask_cur(ctx->net);
+ ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
err = set->ops->insert(set, &elem);
if (err < 0)
goto err5;
@@ -3316,11 +3426,15 @@
return -EBUSY;
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
- err = nft_add_set_elem(&ctx, set, attr);
- if (err < 0)
- break;
+ if (set->size &&
+ !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact))
+ return -ENFILE;
- set->nelems++;
+ err = nft_add_set_elem(&ctx, set, attr);
+ if (err < 0) {
+ atomic_dec(&set->nelems);
+ break;
+ }
}
return err;
}
@@ -3402,11 +3516,36 @@
if (err < 0)
break;
- set->nelems--;
+ set->ndeact++;
}
return err;
}
+void nft_set_gc_batch_release(struct rcu_head *rcu)
+{
+ struct nft_set_gc_batch *gcb;
+ unsigned int i;
+
+ gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu);
+ for (i = 0; i < gcb->head.cnt; i++)
+ nft_set_elem_destroy(gcb->head.set, gcb->elems[i]);
+ kfree(gcb);
+}
+EXPORT_SYMBOL_GPL(nft_set_gc_batch_release);
+
+struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
+ gfp_t gfp)
+{
+ struct nft_set_gc_batch *gcb;
+
+ gcb = kzalloc(sizeof(*gcb), gfp);
+ if (gcb == NULL)
+ return gcb;
+ gcb->head.set = set;
+ return gcb;
+}
+EXPORT_SYMBOL_GPL(nft_set_gc_batch_alloc);
+
static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
u32 portid, u32 seq)
{
@@ -3710,6 +3849,8 @@
&te->elem,
NFT_MSG_DELSETELEM, 0);
te->set->ops->remove(te->set, &te->elem);
+ atomic_dec(&te->set->nelems);
+ te->set->ndeact--;
break;
}
}
@@ -3813,16 +3954,16 @@
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSETELEM:
- nft_trans_elem_set(trans)->nelems--;
te = (struct nft_trans_elem *)trans->data;
te->set->ops->remove(te->set, &te->elem);
+ atomic_dec(&te->set->nelems);
break;
case NFT_MSG_DELSETELEM:
te = (struct nft_trans_elem *)trans->data;
- nft_trans_elem_set(trans)->nelems++;
te->set->ops->activate(te->set, &te->elem);
+ te->set->ndeact--;
nft_trans_destroy(trans);
break;
@@ -3960,7 +4101,8 @@
continue;
list_for_each_entry(binding, &set->bindings, list) {
- if (binding->chain != chain)
+ if (!(binding->flags & NFT_SET_MAP) ||
+ binding->chain != chain)
continue;
iter.skip = 0;
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index ef4dfcb..7caf08a 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -239,8 +239,14 @@
if (err < 0)
goto err6;
+ err = nft_dynset_module_init();
+ if (err < 0)
+ goto err7;
+
return 0;
+err7:
+ nft_payload_module_exit();
err6:
nft_byteorder_module_exit();
err5:
@@ -257,6 +263,7 @@
void nf_tables_core_module_exit(void)
{
+ nft_dynset_module_exit();
nft_payload_module_exit();
nft_byteorder_module_exit();
nft_bitwise_module_exit();
diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c
index 957b83a..51afea4 100644
--- a/net/netfilter/nfnetlink_log.c
+++ b/net/netfilter/nfnetlink_log.c
@@ -23,6 +23,7 @@
#include <linux/ipv6.h>
#include <linux/netdevice.h>
#include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
#include <net/netlink.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_log.h>
@@ -448,14 +449,18 @@
htonl(br_port_get_rcu(indev)->br->dev->ifindex)))
goto nla_put_failure;
} else {
+ struct net_device *physindev;
+
/* Case 2: indev is bridge group, we need to look for
* physical device (when called from ipv4) */
if (nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV,
htonl(indev->ifindex)))
goto nla_put_failure;
- if (skb->nf_bridge && skb->nf_bridge->physindev &&
+
+ physindev = nf_bridge_get_physindev(skb);
+ if (physindev &&
nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
- htonl(skb->nf_bridge->physindev->ifindex)))
+ htonl(physindev->ifindex)))
goto nla_put_failure;
}
#endif
@@ -479,14 +484,18 @@
htonl(br_port_get_rcu(outdev)->br->dev->ifindex)))
goto nla_put_failure;
} else {
+ struct net_device *physoutdev;
+
/* Case 2: indev is a bridge group, we need to look
* for physical device (when called from ipv4) */
if (nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
htonl(outdev->ifindex)))
goto nla_put_failure;
- if (skb->nf_bridge && skb->nf_bridge->physoutdev &&
+
+ physoutdev = nf_bridge_get_physoutdev(skb);
+ if (physoutdev &&
nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
- htonl(skb->nf_bridge->physoutdev->ifindex)))
+ htonl(physoutdev->ifindex)))
goto nla_put_failure;
}
#endif
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 6e74655..628afc3 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -25,6 +25,7 @@
#include <linux/proc_fs.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_bridge.h>
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_queue.h>
#include <linux/list.h>
@@ -396,14 +397,18 @@
htonl(br_port_get_rcu(indev)->br->dev->ifindex)))
goto nla_put_failure;
} else {
+ int physinif;
+
/* Case 2: indev is bridge group, we need to look for
* physical device (when called from ipv4) */
if (nla_put_be32(skb, NFQA_IFINDEX_INDEV,
htonl(indev->ifindex)))
goto nla_put_failure;
- if (entskb->nf_bridge && entskb->nf_bridge->physindev &&
+
+ physinif = nf_bridge_get_physinif(entskb);
+ if (physinif &&
nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV,
- htonl(entskb->nf_bridge->physindev->ifindex)))
+ htonl(physinif)))
goto nla_put_failure;
}
#endif
@@ -426,14 +431,18 @@
htonl(br_port_get_rcu(outdev)->br->dev->ifindex)))
goto nla_put_failure;
} else {
+ int physoutif;
+
/* Case 2: outdev is bridge group, we need to look for
* physical output device (when called from ipv4) */
if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV,
htonl(outdev->ifindex)))
goto nla_put_failure;
- if (entskb->nf_bridge && entskb->nf_bridge->physoutdev &&
+
+ physoutif = nf_bridge_get_physoutif(entskb);
+ if (physoutif &&
nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV,
- htonl(entskb->nf_bridge->physoutdev->ifindex)))
+ htonl(physoutif)))
goto nla_put_failure;
}
#endif
@@ -765,11 +774,12 @@
return 1;
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
if (entry->skb->nf_bridge) {
- if (entry->skb->nf_bridge->physindev &&
- entry->skb->nf_bridge->physindev->ifindex == ifindex)
- return 1;
- if (entry->skb->nf_bridge->physoutdev &&
- entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
+ int physinif, physoutif;
+
+ physinif = nf_bridge_get_physinif(entry->skb);
+ physoutif = nf_bridge_get_physoutif(entry->skb);
+
+ if (physinif == ifindex || physoutif == ifindex)
return 1;
}
#endif
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
new file mode 100644
index 0000000..eeb72de
--- /dev/null
+++ b/net/netfilter/nft_dynset.c
@@ -0,0 +1,218 @@
+/*
+ * Copyright (c) 2015 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+
+struct nft_dynset {
+ struct nft_set *set;
+ struct nft_set_ext_tmpl tmpl;
+ enum nft_dynset_ops op:8;
+ enum nft_registers sreg_key:8;
+ enum nft_registers sreg_data:8;
+ u64 timeout;
+ struct nft_set_binding binding;
+};
+
+static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1])
+{
+ const struct nft_dynset *priv = nft_expr_priv(expr);
+ u64 timeout;
+ void *elem;
+
+ if (set->size && !atomic_add_unless(&set->nelems, 1, set->size))
+ return NULL;
+
+ timeout = priv->timeout ? : set->timeout;
+ elem = nft_set_elem_init(set, &priv->tmpl,
+ &data[priv->sreg_key], &data[priv->sreg_data],
+ timeout, GFP_ATOMIC);
+ if (elem == NULL) {
+ if (set->size)
+ atomic_dec(&set->nelems);
+ }
+ return elem;
+}
+
+static void nft_dynset_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_dynset *priv = nft_expr_priv(expr);
+ struct nft_set *set = priv->set;
+ const struct nft_set_ext *ext;
+ u64 timeout;
+
+ if (set->ops->update(set, &data[priv->sreg_key], nft_dynset_new,
+ expr, data, &ext)) {
+ if (priv->op == NFT_DYNSET_OP_UPDATE &&
+ nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
+ timeout = priv->timeout ? : set->timeout;
+ *nft_set_ext_expiration(ext) = jiffies + timeout;
+ return;
+ }
+ }
+
+ data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = {
+ [NFTA_DYNSET_SET_NAME] = { .type = NLA_STRING },
+ [NFTA_DYNSET_SET_ID] = { .type = NLA_U32 },
+ [NFTA_DYNSET_OP] = { .type = NLA_U32 },
+ [NFTA_DYNSET_SREG_KEY] = { .type = NLA_U32 },
+ [NFTA_DYNSET_SREG_DATA] = { .type = NLA_U32 },
+ [NFTA_DYNSET_TIMEOUT] = { .type = NLA_U64 },
+};
+
+static int nft_dynset_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_dynset *priv = nft_expr_priv(expr);
+ struct nft_set *set;
+ u64 timeout;
+ int err;
+
+ if (tb[NFTA_DYNSET_SET_NAME] == NULL ||
+ tb[NFTA_DYNSET_OP] == NULL ||
+ tb[NFTA_DYNSET_SREG_KEY] == NULL)
+ return -EINVAL;
+
+ set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME]);
+ if (IS_ERR(set)) {
+ if (tb[NFTA_DYNSET_SET_ID])
+ set = nf_tables_set_lookup_byid(ctx->net,
+ tb[NFTA_DYNSET_SET_ID]);
+ if (IS_ERR(set))
+ return PTR_ERR(set);
+ }
+
+ if (set->flags & NFT_SET_CONSTANT)
+ return -EBUSY;
+
+ priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP]));
+ switch (priv->op) {
+ case NFT_DYNSET_OP_ADD:
+ break;
+ case NFT_DYNSET_OP_UPDATE:
+ if (!(set->flags & NFT_SET_TIMEOUT))
+ return -EOPNOTSUPP;
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ timeout = 0;
+ if (tb[NFTA_DYNSET_TIMEOUT] != NULL) {
+ if (!(set->flags & NFT_SET_TIMEOUT))
+ return -EINVAL;
+ timeout = be64_to_cpu(nla_get_be64(tb[NFTA_DYNSET_TIMEOUT]));
+ }
+
+ priv->sreg_key = ntohl(nla_get_be32(tb[NFTA_DYNSET_SREG_KEY]));
+ err = nft_validate_input_register(priv->sreg_key);
+ if (err < 0)
+ return err;
+
+ if (tb[NFTA_DYNSET_SREG_DATA] != NULL) {
+ if (!(set->flags & NFT_SET_MAP))
+ return -EINVAL;
+ if (set->dtype == NFT_DATA_VERDICT)
+ return -EOPNOTSUPP;
+
+ priv->sreg_data = ntohl(nla_get_be32(tb[NFTA_DYNSET_SREG_DATA]));
+ err = nft_validate_input_register(priv->sreg_data);
+ if (err < 0)
+ return err;
+ } else if (set->flags & NFT_SET_MAP)
+ return -EINVAL;
+
+ nft_set_ext_prepare(&priv->tmpl);
+ nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_KEY, set->klen);
+ if (set->flags & NFT_SET_MAP)
+ nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_DATA, set->dlen);
+ if (set->flags & NFT_SET_TIMEOUT) {
+ if (timeout || set->timeout)
+ nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_EXPIRATION);
+ }
+
+ priv->timeout = timeout;
+
+ err = nf_tables_bind_set(ctx, set, &priv->binding);
+ if (err < 0)
+ return err;
+
+ priv->set = set;
+ return 0;
+}
+
+static void nft_dynset_destroy(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_dynset *priv = nft_expr_priv(expr);
+
+ nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+}
+
+static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+ const struct nft_dynset *priv = nft_expr_priv(expr);
+
+ if (nla_put_be32(skb, NFTA_DYNSET_SREG_KEY, htonl(priv->sreg_key)))
+ goto nla_put_failure;
+ if (priv->set->flags & NFT_SET_MAP &&
+ nla_put_be32(skb, NFTA_DYNSET_SREG_DATA, htonl(priv->sreg_data)))
+ goto nla_put_failure;
+ if (nla_put_be32(skb, NFTA_DYNSET_OP, htonl(priv->op)))
+ goto nla_put_failure;
+ if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name))
+ goto nla_put_failure;
+ if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, cpu_to_be64(priv->timeout)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -1;
+}
+
+static struct nft_expr_type nft_dynset_type;
+static const struct nft_expr_ops nft_dynset_ops = {
+ .type = &nft_dynset_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_dynset)),
+ .eval = nft_dynset_eval,
+ .init = nft_dynset_init,
+ .destroy = nft_dynset_destroy,
+ .dump = nft_dynset_dump,
+};
+
+static struct nft_expr_type nft_dynset_type __read_mostly = {
+ .name = "dynset",
+ .ops = &nft_dynset_ops,
+ .policy = nft_dynset_policy,
+ .maxattr = NFTA_DYNSET_MAX,
+ .owner = THIS_MODULE,
+};
+
+int __init nft_dynset_module_init(void)
+{
+ return nft_register_expr(&nft_dynset_type);
+}
+
+void nft_dynset_module_exit(void)
+{
+ nft_unregister_expr(&nft_dynset_type);
+}
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index c7e1a9d..bc23806 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -15,6 +15,7 @@
#include <linux/log2.h>
#include <linux/jhash.h>
#include <linux/netlink.h>
+#include <linux/workqueue.h>
#include <linux/rhashtable.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
@@ -25,6 +26,7 @@
struct nft_hash {
struct rhashtable ht;
+ struct delayed_work gc_work;
};
struct nft_hash_elem {
@@ -62,6 +64,8 @@
if (nft_data_cmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
return 1;
+ if (nft_set_elem_expired(&he->ext))
+ return 1;
if (!nft_set_elem_active(&he->ext, x->genmask))
return 1;
return 0;
@@ -86,6 +90,42 @@
return !!he;
}
+static bool nft_hash_update(struct nft_set *set, const struct nft_data *key,
+ void *(*new)(struct nft_set *,
+ const struct nft_expr *,
+ struct nft_data []),
+ const struct nft_expr *expr,
+ struct nft_data data[],
+ const struct nft_set_ext **ext)
+{
+ struct nft_hash *priv = nft_set_priv(set);
+ struct nft_hash_elem *he;
+ struct nft_hash_cmp_arg arg = {
+ .genmask = NFT_GENMASK_ANY,
+ .set = set,
+ .key = key,
+ };
+
+ he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+ if (he != NULL)
+ goto out;
+
+ he = new(set, expr, data);
+ if (he == NULL)
+ goto err1;
+ if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
+ nft_hash_params))
+ goto err2;
+out:
+ *ext = &he->ext;
+ return true;
+
+err2:
+ nft_set_elem_destroy(set, he);
+err1:
+ return false;
+}
+
static int nft_hash_insert(const struct nft_set *set,
const struct nft_set_elem *elem)
{
@@ -107,6 +147,7 @@
struct nft_hash_elem *he = elem->priv;
nft_set_elem_change_active(set, &he->ext);
+ nft_set_elem_clear_busy(&he->ext);
}
static void *nft_hash_deactivate(const struct nft_set *set,
@@ -120,9 +161,15 @@
.key = &elem->key,
};
+ rcu_read_lock();
he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
- if (he != NULL)
- nft_set_elem_change_active(set, &he->ext);
+ if (he != NULL) {
+ if (!nft_set_elem_mark_busy(&he->ext))
+ nft_set_elem_change_active(set, &he->ext);
+ else
+ he = NULL;
+ }
+ rcu_read_unlock();
return he;
}
@@ -170,6 +217,8 @@
if (iter->count < iter->skip)
goto cont;
+ if (nft_set_elem_expired(&he->ext))
+ goto cont;
if (!nft_set_elem_active(&he->ext, genmask))
goto cont;
@@ -188,6 +237,55 @@
rhashtable_walk_exit(&hti);
}
+static void nft_hash_gc(struct work_struct *work)
+{
+ struct nft_set *set;
+ struct nft_hash_elem *he;
+ struct nft_hash *priv;
+ struct nft_set_gc_batch *gcb = NULL;
+ struct rhashtable_iter hti;
+ int err;
+
+ priv = container_of(work, struct nft_hash, gc_work.work);
+ set = nft_set_container_of(priv);
+
+ err = rhashtable_walk_init(&priv->ht, &hti);
+ if (err)
+ goto schedule;
+
+ err = rhashtable_walk_start(&hti);
+ if (err && err != -EAGAIN)
+ goto out;
+
+ while ((he = rhashtable_walk_next(&hti))) {
+ if (IS_ERR(he)) {
+ if (PTR_ERR(he) != -EAGAIN)
+ goto out;
+ continue;
+ }
+
+ if (!nft_set_elem_expired(&he->ext))
+ continue;
+ if (nft_set_elem_mark_busy(&he->ext))
+ continue;
+
+ gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
+ if (gcb == NULL)
+ goto out;
+ rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
+ atomic_dec(&set->nelems);
+ nft_set_gc_batch_add(gcb, he);
+ }
+out:
+ rhashtable_walk_stop(&hti);
+ rhashtable_walk_exit(&hti);
+
+ nft_set_gc_batch_complete(gcb);
+schedule:
+ queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+ nft_set_gc_interval(set));
+}
+
static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
{
return sizeof(struct nft_hash);
@@ -207,11 +305,20 @@
{
struct nft_hash *priv = nft_set_priv(set);
struct rhashtable_params params = nft_hash_params;
+ int err;
params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT;
params.key_len = set->klen;
- return rhashtable_init(&priv->ht, ¶ms);
+ err = rhashtable_init(&priv->ht, ¶ms);
+ if (err < 0)
+ return err;
+
+ INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc);
+ if (set->flags & NFT_SET_TIMEOUT)
+ queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+ nft_set_gc_interval(set));
+ return 0;
}
static void nft_hash_elem_destroy(void *ptr, void *arg)
@@ -223,6 +330,7 @@
{
struct nft_hash *priv = nft_set_priv(set);
+ cancel_delayed_work_sync(&priv->gc_work);
rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy,
(void *)set);
}
@@ -263,8 +371,9 @@
.deactivate = nft_hash_deactivate,
.remove = nft_hash_remove,
.lookup = nft_hash_lookup,
+ .update = nft_hash_update,
.walk = nft_hash_walk,
- .features = NFT_SET_MAP,
+ .features = NFT_SET_MAP | NFT_SET_TIMEOUT,
.owner = THIS_MODULE,
};
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index a5f30b8..d8cf86f 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -92,6 +92,8 @@
} else if (set->flags & NFT_SET_MAP)
return -EINVAL;
+ priv->binding.flags = set->flags & NFT_SET_MAP;
+
err = nf_tables_bind_set(ctx, set, &priv->binding);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 5197874..d79ce88 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -166,9 +166,8 @@
dest->data[0] = out->group;
break;
case NFT_META_CGROUP:
- if (skb->sk == NULL)
- break;
-
+ if (skb->sk == NULL || !sk_fullsock(skb->sk))
+ goto err;
dest->data[0] = skb->sk->sk_classid;
break;
default:
diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c
index 7198d66..a1d126f 100644
--- a/net/netfilter/xt_cgroup.c
+++ b/net/netfilter/xt_cgroup.c
@@ -39,7 +39,7 @@
{
const struct xt_cgroup_info *info = par->matchinfo;
- if (skb->sk == NULL)
+ if (skb->sk == NULL || !sk_fullsock(skb->sk))
return false;
return (info->id == skb->sk->sk_classid) ^ info->invert;
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 50a5204..1caaccb 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -25,16 +25,15 @@
static bool
physdev_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
- static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
const struct xt_physdev_info *info = par->matchinfo;
+ const struct net_device *physdev;
unsigned long ret;
const char *indev, *outdev;
- const struct nf_bridge_info *nf_bridge;
/* Not a bridged IP packet or no info available yet:
* LOCAL_OUT/mangle and LOCAL_OUT/nat don't know if
* the destination device will be a bridge. */
- if (!(nf_bridge = skb->nf_bridge)) {
+ if (!skb->nf_bridge) {
/* Return MATCH if the invert flags of the used options are on */
if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) &&
!(info->invert & XT_PHYSDEV_OP_BRIDGED))
@@ -54,30 +53,41 @@
return true;
}
+ physdev = nf_bridge_get_physoutdev(skb);
+ outdev = physdev ? physdev->name : NULL;
+
/* This only makes sense in the FORWARD and POSTROUTING chains */
if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) &&
- (!!nf_bridge->physoutdev ^ !(info->invert & XT_PHYSDEV_OP_BRIDGED)))
+ (!!outdev ^ !(info->invert & XT_PHYSDEV_OP_BRIDGED)))
return false;
+ physdev = nf_bridge_get_physindev(skb);
+ indev = physdev ? physdev->name : NULL;
+
if ((info->bitmask & XT_PHYSDEV_OP_ISIN &&
- (!nf_bridge->physindev ^ !!(info->invert & XT_PHYSDEV_OP_ISIN))) ||
+ (!indev ^ !!(info->invert & XT_PHYSDEV_OP_ISIN))) ||
(info->bitmask & XT_PHYSDEV_OP_ISOUT &&
- (!nf_bridge->physoutdev ^ !!(info->invert & XT_PHYSDEV_OP_ISOUT))))
+ (!outdev ^ !!(info->invert & XT_PHYSDEV_OP_ISOUT))))
return false;
if (!(info->bitmask & XT_PHYSDEV_OP_IN))
goto match_outdev;
- indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname;
- ret = ifname_compare_aligned(indev, info->physindev, info->in_mask);
- if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN))
- return false;
+ if (indev) {
+ ret = ifname_compare_aligned(indev, info->physindev,
+ info->in_mask);
+
+ if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN))
+ return false;
+ }
match_outdev:
if (!(info->bitmask & XT_PHYSDEV_OP_OUT))
return true;
- outdev = nf_bridge->physoutdev ?
- nf_bridge->physoutdev->name : nulldevname;
+
+ if (!outdev)
+ return false;
+
ret = ifname_compare_aligned(outdev, info->physoutdev, info->out_mask);
return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT));
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 895534e..e092cb0 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -143,13 +143,10 @@
}
}
-static bool
-socket_match(const struct sk_buff *skb, struct xt_action_param *par,
- const struct xt_socket_mtinfo1 *info)
+static struct sock *xt_socket_lookup_slow_v4(const struct sk_buff *skb,
+ const struct net_device *indev)
{
const struct iphdr *iph = ip_hdr(skb);
- struct udphdr _hdr, *hp = NULL;
- struct sock *sk = skb->sk;
__be32 uninitialized_var(daddr), uninitialized_var(saddr);
__be16 uninitialized_var(dport), uninitialized_var(sport);
u8 uninitialized_var(protocol);
@@ -159,10 +156,12 @@
#endif
if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) {
+ struct udphdr _hdr, *hp;
+
hp = skb_header_pointer(skb, ip_hdrlen(skb),
sizeof(_hdr), &_hdr);
if (hp == NULL)
- return false;
+ return NULL;
protocol = iph->protocol;
saddr = iph->saddr;
@@ -172,16 +171,17 @@
} else if (iph->protocol == IPPROTO_ICMP) {
if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
- &sport, &dport))
- return false;
+ &sport, &dport))
+ return NULL;
} else {
- return false;
+ return NULL;
}
#ifdef XT_SOCKET_HAVE_CONNTRACK
- /* Do the lookup with the original socket address in case this is a
- * reply packet of an established SNAT-ted connection. */
-
+ /* Do the lookup with the original socket address in
+ * case this is a reply packet of an established
+ * SNAT-ted connection.
+ */
ct = nf_ct_get(skb, &ctinfo);
if (ct && !nf_ct_is_untracked(ct) &&
((iph->protocol != IPPROTO_ICMP &&
@@ -197,10 +197,18 @@
}
#endif
+ return xt_socket_get_sock_v4(dev_net(skb->dev), protocol, saddr, daddr,
+ sport, dport, indev);
+}
+
+static bool
+socket_match(const struct sk_buff *skb, struct xt_action_param *par,
+ const struct xt_socket_mtinfo1 *info)
+{
+ struct sock *sk = skb->sk;
+
if (!sk)
- sk = xt_socket_get_sock_v4(dev_net(skb->dev), protocol,
- saddr, daddr, sport, dport,
- par->in);
+ sk = xt_socket_lookup_slow_v4(skb, par->in);
if (sk) {
bool wildcard;
bool transparent = true;
@@ -225,12 +233,7 @@
sk = NULL;
}
- pr_debug("proto %hhu %pI4:%hu -> %pI4:%hu (orig %pI4:%hu) sock %p\n",
- protocol, &saddr, ntohs(sport),
- &daddr, ntohs(dport),
- &iph->daddr, hp ? ntohs(hp->dest) : 0, sk);
-
- return (sk != NULL);
+ return sk != NULL;
}
static bool
@@ -327,28 +330,26 @@
return NULL;
}
-static bool
-socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
+static struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb,
+ const struct net_device *indev)
{
- struct ipv6hdr ipv6_var, *iph = ipv6_hdr(skb);
- struct udphdr _hdr, *hp = NULL;
- struct sock *sk = skb->sk;
- const struct in6_addr *daddr = NULL, *saddr = NULL;
__be16 uninitialized_var(dport), uninitialized_var(sport);
- int thoff = 0, uninitialized_var(tproto);
- const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
+ const struct in6_addr *daddr = NULL, *saddr = NULL;
+ struct ipv6hdr *iph = ipv6_hdr(skb);
+ int thoff = 0, tproto;
tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
if (tproto < 0) {
pr_debug("unable to find transport header in IPv6 packet, dropping\n");
- return NF_DROP;
+ return NULL;
}
if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) {
- hp = skb_header_pointer(skb, thoff,
- sizeof(_hdr), &_hdr);
+ struct udphdr _hdr, *hp;
+
+ hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
if (hp == NULL)
- return false;
+ return NULL;
saddr = &iph->saddr;
sport = hp->source;
@@ -356,17 +357,27 @@
dport = hp->dest;
} else if (tproto == IPPROTO_ICMPV6) {
+ struct ipv6hdr ipv6_var;
+
if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr,
&sport, &dport, &ipv6_var))
- return false;
+ return NULL;
} else {
- return false;
+ return NULL;
}
+ return xt_socket_get_sock_v6(dev_net(skb->dev), tproto, saddr, daddr,
+ sport, dport, indev);
+}
+
+static bool
+socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
+ struct sock *sk = skb->sk;
+
if (!sk)
- sk = xt_socket_get_sock_v6(dev_net(skb->dev), tproto,
- saddr, daddr, sport, dport,
- par->in);
+ sk = xt_socket_lookup_slow_v6(skb, par->in);
if (sk) {
bool wildcard;
bool transparent = true;
@@ -391,13 +402,7 @@
sk = NULL;
}
- pr_debug("proto %hhd %pI6:%hu -> %pI6:%hu "
- "(orig %pI6:%hu) sock %p\n",
- tproto, saddr, ntohs(sport),
- daddr, ntohs(dport),
- &iph->daddr, hp ? ntohs(hp->dest) : 0, sk);
-
- return (sk != NULL);
+ return sk != NULL;
}
#endif