Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
authorPablo Neira Ayuso <pablo@netfilter.org>
Wed, 8 Apr 2015 15:40:17 +0000 (17:40 +0200)
committerPablo Neira Ayuso <pablo@netfilter.org>
Wed, 8 Apr 2015 16:30:21 +0000 (18:30 +0200)
Resolve conflicts between 5888b93 ("Merge branch 'nf-hook-compress'") and
Florian Westphal br_netfilter works.

Conflicts:
        net/bridge/br_netfilter.c

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
23 files changed:
include/linux/netfilter_bridge.h
include/linux/skbuff.h
include/net/netfilter/nf_tables.h
include/net/netfilter/nf_tables_core.h
include/uapi/linux/netfilter/nf_tables.h
net/bridge/br_netfilter.c
net/ipv4/netfilter/nf_reject_ipv4.c
net/ipv6/netfilter/nf_reject_ipv6.c
net/netfilter/Makefile
net/netfilter/ipset/ip_set_hash_netiface.c
net/netfilter/nf_log_common.c
net/netfilter/nf_queue.c
net/netfilter/nf_tables_api.c
net/netfilter/nf_tables_core.c
net/netfilter/nfnetlink_log.c
net/netfilter/nfnetlink_queue_core.c
net/netfilter/nft_dynset.c [new file with mode: 0644]
net/netfilter/nft_hash.c
net/netfilter/nft_lookup.c
net/netfilter/nft_meta.c
net/netfilter/xt_cgroup.c
net/netfilter/xt_physdev.c
net/netfilter/xt_socket.c

index 5fc0a0fe244b47ee691b62292314082d0d038dde..ab8f76dba6680f485943545407daeb117620b489 100644 (file)
@@ -2,7 +2,7 @@
 #define __LINUX_BRIDGE_NETFILTER_H
 
 #include <uapi/linux/netfilter_bridge.h>
-
+#include <linux/skbuff.h>
 
 enum nf_br_hook_priorities {
        NF_BR_PRI_FIRST = INT_MIN,
@@ -17,15 +17,12 @@ enum nf_br_hook_priorities {
 
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 
-#define BRNF_PKT_TYPE                  0x01
 #define BRNF_BRIDGED_DNAT              0x02
 #define BRNF_NF_BRIDGE_PREROUTING      0x08
-#define BRNF_8021Q                     0x10
-#define BRNF_PPPoE                     0x20
 
 static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
 {
-       if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE))
+       if (skb->nf_bridge->orig_proto == BRNF_PROTO_PPPOE)
                return PPPOE_SES_HLEN;
        return 0;
 }
@@ -40,6 +37,27 @@ static inline void br_drop_fake_rtable(struct sk_buff *skb)
                skb_dst_drop(skb);
 }
 
+static inline int nf_bridge_get_physinif(const struct sk_buff *skb)
+{
+       return skb->nf_bridge ? skb->nf_bridge->physindev->ifindex : 0;
+}
+
+static inline int nf_bridge_get_physoutif(const struct sk_buff *skb)
+{
+       return skb->nf_bridge ? skb->nf_bridge->physoutdev->ifindex : 0;
+}
+
+static inline struct net_device *
+nf_bridge_get_physindev(const struct sk_buff *skb)
+{
+       return skb->nf_bridge ? skb->nf_bridge->physindev : NULL;
+}
+
+static inline struct net_device *
+nf_bridge_get_physoutdev(const struct sk_buff *skb)
+{
+       return skb->nf_bridge ? skb->nf_bridge->physoutdev : NULL;
+}
 #else
 #define br_drop_fake_rtable(skb)               do { } while (0)
 #endif /* CONFIG_BRIDGE_NETFILTER */
index 36f3f43c011789efe68c372296cc691cc7a3863e..0991259643d6ef44eac3940728a4c3fd1d51d7bb 100644 (file)
@@ -166,10 +166,16 @@ struct nf_conntrack {
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 struct nf_bridge_info {
        atomic_t                use;
+       enum {
+               BRNF_PROTO_UNCHANGED,
+               BRNF_PROTO_8021Q,
+               BRNF_PROTO_PPPOE
+       } orig_proto;
+       bool                    pkt_otherhost;
        unsigned int            mask;
        struct net_device       *physindev;
        struct net_device       *physoutdev;
-       unsigned long           data[32 / sizeof(unsigned long)];
+       char                    neigh_header[8];
 };
 #endif
 
index 8049819803939043db30ea182b715341f0cd9587..d6a2f0ed5130582d0489bc372e3b7e033f5267e8 100644 (file)
@@ -195,6 +195,7 @@ struct nft_set_estimate {
 };
 
 struct nft_set_ext;
+struct nft_expr;
 
 /**
  *     struct nft_set_ops - nf_tables set operations
@@ -217,6 +218,15 @@ struct nft_set_ops {
        bool                            (*lookup)(const struct nft_set *set,
                                                  const struct nft_data *key,
                                                  const struct nft_set_ext **ext);
+       bool                            (*update)(struct nft_set *set,
+                                                 const struct nft_data *key,
+                                                 void *(*new)(struct nft_set *,
+                                                              const struct nft_expr *,
+                                                              struct nft_data []),
+                                                 const struct nft_expr *expr,
+                                                 struct nft_data data[],
+                                                 const struct nft_set_ext **ext);
+
        int                             (*insert)(const struct nft_set *set,
                                                  const struct nft_set_elem *elem);
        void                            (*activate)(const struct nft_set *set,
@@ -257,6 +267,9 @@ void nft_unregister_set(struct nft_set_ops *ops);
  *     @dtype: data type (verdict or numeric type defined by userspace)
  *     @size: maximum set size
  *     @nelems: number of elements
+ *     @ndeact: number of deactivated elements queued for removal
+ *     @timeout: default timeout value in msecs
+ *     @gc_int: garbage collection interval in msecs
  *     @policy: set parameterization (see enum nft_set_policies)
  *     @ops: set ops
  *     @pnet: network namespace
@@ -272,7 +285,10 @@ struct nft_set {
        u32                             ktype;
        u32                             dtype;
        u32                             size;
-       u32                             nelems;
+       atomic_t                        nelems;
+       u32                             ndeact;
+       u64                             timeout;
+       u32                             gc_int;
        u16                             policy;
        /* runtime data below here */
        const struct nft_set_ops        *ops ____cacheline_aligned;
@@ -289,16 +305,27 @@ static inline void *nft_set_priv(const struct nft_set *set)
        return (void *)set->data;
 }
 
+static inline struct nft_set *nft_set_container_of(const void *priv)
+{
+       return (void *)priv - offsetof(struct nft_set, data);
+}
+
 struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
                                     const struct nlattr *nla);
 struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
                                          const struct nlattr *nla);
 
+static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
+{
+       return set->gc_int ? msecs_to_jiffies(set->gc_int) : HZ;
+}
+
 /**
  *     struct nft_set_binding - nf_tables set binding
  *
  *     @list: set bindings list node
  *     @chain: chain containing the rule bound to the set
+ *     @flags: set action flags
  *
  *     A set binding contains all information necessary for validation
  *     of new elements added to a bound set.
@@ -306,6 +333,7 @@ struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
 struct nft_set_binding {
        struct list_head                list;
        const struct nft_chain          *chain;
+       u32                             flags;
 };
 
 int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
@@ -319,12 +347,18 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
  *     @NFT_SET_EXT_KEY: element key
  *     @NFT_SET_EXT_DATA: mapping data
  *     @NFT_SET_EXT_FLAGS: element flags
+ *     @NFT_SET_EXT_TIMEOUT: element timeout
+ *     @NFT_SET_EXT_EXPIRATION: element expiration time
+ *     @NFT_SET_EXT_USERDATA: user data associated with the element
  *     @NFT_SET_EXT_NUM: number of extension types
  */
 enum nft_set_extensions {
        NFT_SET_EXT_KEY,
        NFT_SET_EXT_DATA,
        NFT_SET_EXT_FLAGS,
+       NFT_SET_EXT_TIMEOUT,
+       NFT_SET_EXT_EXPIRATION,
+       NFT_SET_EXT_USERDATA,
        NFT_SET_EXT_NUM
 };
 
@@ -421,14 +455,96 @@ static inline u8 *nft_set_ext_flags(const struct nft_set_ext *ext)
        return nft_set_ext(ext, NFT_SET_EXT_FLAGS);
 }
 
+static inline u64 *nft_set_ext_timeout(const struct nft_set_ext *ext)
+{
+       return nft_set_ext(ext, NFT_SET_EXT_TIMEOUT);
+}
+
+static inline unsigned long *nft_set_ext_expiration(const struct nft_set_ext *ext)
+{
+       return nft_set_ext(ext, NFT_SET_EXT_EXPIRATION);
+}
+
+static inline struct nft_userdata *nft_set_ext_userdata(const struct nft_set_ext *ext)
+{
+       return nft_set_ext(ext, NFT_SET_EXT_USERDATA);
+}
+
+static inline bool nft_set_elem_expired(const struct nft_set_ext *ext)
+{
+       return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) &&
+              time_is_before_eq_jiffies(*nft_set_ext_expiration(ext));
+}
+
 static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set,
                                                   void *elem)
 {
        return elem + set->ops->elemsize;
 }
 
+void *nft_set_elem_init(const struct nft_set *set,
+                       const struct nft_set_ext_tmpl *tmpl,
+                       const struct nft_data *key,
+                       const struct nft_data *data,
+                       u64 timeout, gfp_t gfp);
 void nft_set_elem_destroy(const struct nft_set *set, void *elem);
 
+/**
+ *     struct nft_set_gc_batch_head - nf_tables set garbage collection batch
+ *
+ *     @rcu: rcu head
+ *     @set: set the elements belong to
+ *     @cnt: count of elements
+ */
+struct nft_set_gc_batch_head {
+       struct rcu_head                 rcu;
+       const struct nft_set            *set;
+       unsigned int                    cnt;
+};
+
+#define NFT_SET_GC_BATCH_SIZE  ((PAGE_SIZE -                             \
+                                 sizeof(struct nft_set_gc_batch_head)) / \
+                                sizeof(void *))
+
+/**
+ *     struct nft_set_gc_batch - nf_tables set garbage collection batch
+ *
+ *     @head: GC batch head
+ *     @elems: garbage collection elements
+ */
+struct nft_set_gc_batch {
+       struct nft_set_gc_batch_head    head;
+       void                            *elems[NFT_SET_GC_BATCH_SIZE];
+};
+
+struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
+                                               gfp_t gfp);
+void nft_set_gc_batch_release(struct rcu_head *rcu);
+
+static inline void nft_set_gc_batch_complete(struct nft_set_gc_batch *gcb)
+{
+       if (gcb != NULL)
+               call_rcu(&gcb->head.rcu, nft_set_gc_batch_release);
+}
+
+static inline struct nft_set_gc_batch *
+nft_set_gc_batch_check(const struct nft_set *set, struct nft_set_gc_batch *gcb,
+                      gfp_t gfp)
+{
+       if (gcb != NULL) {
+               if (gcb->head.cnt + 1 < ARRAY_SIZE(gcb->elems))
+                       return gcb;
+               nft_set_gc_batch_complete(gcb);
+       }
+       return nft_set_gc_batch_alloc(set, gfp);
+}
+
+static inline void nft_set_gc_batch_add(struct nft_set_gc_batch *gcb,
+                                       void *elem)
+{
+       gcb->elems[gcb->head.cnt++] = elem;
+}
+
 /**
  *     struct nft_expr_type - nf_tables expression type
  *
@@ -750,6 +866,8 @@ static inline u8 nft_genmask_cur(const struct net *net)
        return 1 << ACCESS_ONCE(net->nft.gencursor);
 }
 
+#define NFT_GENMASK_ANY                ((1 << 0) | (1 << 1))
+
 /*
  * Set element transaction helpers
  */
@@ -766,6 +884,41 @@ static inline void nft_set_elem_change_active(const struct nft_set *set,
        ext->genmask ^= nft_genmask_next(read_pnet(&set->pnet));
 }
 
+/*
+ * We use a free bit in the genmask field to indicate the element
+ * is busy, meaning it is currently being processed either by
+ * the netlink API or GC.
+ *
+ * Even though the genmask is only a single byte wide, this works
+ * because the extension structure if fully constant once initialized,
+ * so there are no non-atomic write accesses unless it is already
+ * marked busy.
+ */
+#define NFT_SET_ELEM_BUSY_MASK (1 << 2)
+
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+#define NFT_SET_ELEM_BUSY_BIT  2
+#elif defined(__BIG_ENDIAN_BITFIELD)
+#define NFT_SET_ELEM_BUSY_BIT  (BITS_PER_LONG - BITS_PER_BYTE + 2)
+#else
+#error
+#endif
+
+static inline int nft_set_elem_mark_busy(struct nft_set_ext *ext)
+{
+       unsigned long *word = (unsigned long *)ext;
+
+       BUILD_BUG_ON(offsetof(struct nft_set_ext, genmask) != 0);
+       return test_and_set_bit(NFT_SET_ELEM_BUSY_BIT, word);
+}
+
+static inline void nft_set_elem_clear_busy(struct nft_set_ext *ext)
+{
+       unsigned long *word = (unsigned long *)ext;
+
+       clear_bit(NFT_SET_ELEM_BUSY_BIT, word);
+}
+
 /**
  *     struct nft_trans - nf_tables object update in transaction
  *
index a75fc8e27cd698483232a3f3a379efc76308a905..c6f400cfaac8d76673d559bf041c5da4b32abf13 100644 (file)
@@ -31,6 +31,9 @@ void nft_cmp_module_exit(void);
 int nft_lookup_module_init(void);
 void nft_lookup_module_exit(void);
 
+int nft_dynset_module_init(void);
+void nft_dynset_module_exit(void);
+
 int nft_bitwise_module_init(void);
 void nft_bitwise_module_exit(void);
 
index b9783931503b1f704a2422d85922301929dc2675..05ee1e0804a3f2c8971c1956dc768c416e4cec71 100644 (file)
@@ -208,12 +208,14 @@ enum nft_rule_compat_attributes {
  * @NFT_SET_CONSTANT: set contents may not change while bound
  * @NFT_SET_INTERVAL: set contains intervals
  * @NFT_SET_MAP: set is used as a dictionary
+ * @NFT_SET_TIMEOUT: set uses timeouts
  */
 enum nft_set_flags {
        NFT_SET_ANONYMOUS               = 0x1,
        NFT_SET_CONSTANT                = 0x2,
        NFT_SET_INTERVAL                = 0x4,
        NFT_SET_MAP                     = 0x8,
+       NFT_SET_TIMEOUT                 = 0x10,
 };
 
 /**
@@ -252,6 +254,8 @@ enum nft_set_desc_attributes {
  * @NFTA_SET_POLICY: selection policy (NLA_U32)
  * @NFTA_SET_DESC: set description (NLA_NESTED)
  * @NFTA_SET_ID: uniquely identifies a set in a transaction (NLA_U32)
+ * @NFTA_SET_TIMEOUT: default timeout value (NLA_U64)
+ * @NFTA_SET_GC_INTERVAL: garbage collection interval (NLA_U32)
  */
 enum nft_set_attributes {
        NFTA_SET_UNSPEC,
@@ -265,6 +269,8 @@ enum nft_set_attributes {
        NFTA_SET_POLICY,
        NFTA_SET_DESC,
        NFTA_SET_ID,
+       NFTA_SET_TIMEOUT,
+       NFTA_SET_GC_INTERVAL,
        __NFTA_SET_MAX
 };
 #define NFTA_SET_MAX           (__NFTA_SET_MAX - 1)
@@ -284,12 +290,18 @@ enum nft_set_elem_flags {
  * @NFTA_SET_ELEM_KEY: key value (NLA_NESTED: nft_data)
  * @NFTA_SET_ELEM_DATA: data value of mapping (NLA_NESTED: nft_data_attributes)
  * @NFTA_SET_ELEM_FLAGS: bitmask of nft_set_elem_flags (NLA_U32)
+ * @NFTA_SET_ELEM_TIMEOUT: timeout value (NLA_U64)
+ * @NFTA_SET_ELEM_EXPIRATION: expiration time (NLA_U64)
+ * @NFTA_SET_ELEM_USERDATA: user data (NLA_BINARY)
  */
 enum nft_set_elem_attributes {
        NFTA_SET_ELEM_UNSPEC,
        NFTA_SET_ELEM_KEY,
        NFTA_SET_ELEM_DATA,
        NFTA_SET_ELEM_FLAGS,
+       NFTA_SET_ELEM_TIMEOUT,
+       NFTA_SET_ELEM_EXPIRATION,
+       NFTA_SET_ELEM_USERDATA,
        __NFTA_SET_ELEM_MAX
 };
 #define NFTA_SET_ELEM_MAX      (__NFTA_SET_ELEM_MAX - 1)
@@ -505,6 +517,33 @@ enum nft_lookup_attributes {
 };
 #define NFTA_LOOKUP_MAX                (__NFTA_LOOKUP_MAX - 1)
 
+enum nft_dynset_ops {
+       NFT_DYNSET_OP_ADD,
+       NFT_DYNSET_OP_UPDATE,
+};
+
+/**
+ * enum nft_dynset_attributes - dynset expression attributes
+ *
+ * @NFTA_DYNSET_SET_NAME: name of set the to add data to (NLA_STRING)
+ * @NFTA_DYNSET_SET_ID: uniquely identifier of the set in the transaction (NLA_U32)
+ * @NFTA_DYNSET_OP: operation (NLA_U32)
+ * @NFTA_DYNSET_SREG_KEY: source register of the key (NLA_U32)
+ * @NFTA_DYNSET_SREG_DATA: source register of the data (NLA_U32)
+ * @NFTA_DYNSET_TIMEOUT: timeout value for the new element (NLA_U64)
+ */
+enum nft_dynset_attributes {
+       NFTA_DYNSET_UNSPEC,
+       NFTA_DYNSET_SET_NAME,
+       NFTA_DYNSET_SET_ID,
+       NFTA_DYNSET_OP,
+       NFTA_DYNSET_SREG_KEY,
+       NFTA_DYNSET_SREG_DATA,
+       NFTA_DYNSET_TIMEOUT,
+       __NFTA_DYNSET_MAX,
+};
+#define NFTA_DYNSET_MAX                (__NFTA_DYNSET_MAX - 1)
+
 /**
  * enum nft_payload_bases - nf_tables payload expression offset bases
  *
index acd31c9f21165cf06e36555d20746e027f24dfee..ab55e2472beb0e44dece07e327f2e0eb8d3f502c 100644 (file)
@@ -111,6 +111,24 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb)
         pppoe_proto(skb) == htons(PPP_IPV6) && \
         brnf_filter_pppoe_tagged)
 
+/* largest possible L2 header, see br_nf_dev_queue_xmit() */
+#define NF_BRIDGE_MAX_MAC_HEADER_LENGTH (PPPOE_SES_HLEN + ETH_HLEN)
+
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
+struct brnf_frag_data {
+       char mac[NF_BRIDGE_MAX_MAC_HEADER_LENGTH];
+       u8 encap_size;
+       u8 size;
+};
+
+static DEFINE_PER_CPU(struct brnf_frag_data, brnf_frag_data_storage);
+#endif
+
+static struct nf_bridge_info *nf_bridge_info_get(const struct sk_buff *skb)
+{
+       return skb->nf_bridge;
+}
+
 static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
 {
        struct net_bridge_port *port;
@@ -189,14 +207,6 @@ static inline void nf_bridge_pull_encap_header_rcsum(struct sk_buff *skb)
        skb->network_header += len;
 }
 
-static inline void nf_bridge_save_header(struct sk_buff *skb)
-{
-       int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
-
-       skb_copy_from_linear_data_offset(skb, -header_size,
-                                        skb->nf_bridge->data, header_size);
-}
-
 /* When handing a packet over to the IP layer
  * check whether we have a skb that is in the
  * expected format
@@ -252,10 +262,16 @@ drop:
 
 static void nf_bridge_update_protocol(struct sk_buff *skb)
 {
-       if (skb->nf_bridge->mask & BRNF_8021Q)
+       switch (skb->nf_bridge->orig_proto) {
+       case BRNF_PROTO_8021Q:
                skb->protocol = htons(ETH_P_8021Q);
-       else if (skb->nf_bridge->mask & BRNF_PPPoE)
+               break;
+       case BRNF_PROTO_PPPOE:
                skb->protocol = htons(ETH_P_PPP_SES);
+               break;
+       case BRNF_PROTO_UNCHANGED:
+               break;
+       }
 }
 
 /* PF_BRIDGE/PRE_ROUTING *********************************************/
@@ -263,12 +279,12 @@ static void nf_bridge_update_protocol(struct sk_buff *skb)
  * bridge PRE_ROUTING hook. */
 static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
 {
-       struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+       struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
        struct rtable *rt;
 
-       if (nf_bridge->mask & BRNF_PKT_TYPE) {
+       if (nf_bridge->pkt_otherhost) {
                skb->pkt_type = PACKET_OTHERHOST;
-               nf_bridge->mask ^= BRNF_PKT_TYPE;
+               nf_bridge->pkt_otherhost = false;
        }
        nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
 
@@ -296,7 +312,6 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
  */
 static int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb)
 {
-       struct nf_bridge_info *nf_bridge = skb->nf_bridge;
        struct neighbour *neigh;
        struct dst_entry *dst;
 
@@ -306,6 +321,7 @@ static int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb)
        dst = skb_dst(skb);
        neigh = dst_neigh_lookup_skb(dst, skb);
        if (neigh) {
+               struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
                int ret;
 
                if (neigh->hh.hh_len) {
@@ -319,7 +335,7 @@ static int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb)
                         */
                        skb_copy_from_linear_data_offset(skb,
                                                         -(ETH_HLEN-ETH_ALEN),
-                                                        skb->nf_bridge->data,
+                                                        nf_bridge->neigh_header,
                                                         ETH_HLEN-ETH_ALEN);
                        /* tell br_dev_xmit to continue with forwarding */
                        nf_bridge->mask |= BRNF_BRIDGED_DNAT;
@@ -392,7 +408,7 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb)
 {
        struct net_device *dev = skb->dev;
        struct iphdr *iph = ip_hdr(skb);
-       struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+       struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
        struct rtable *rt;
        int err;
        int frag_max_size;
@@ -400,9 +416,9 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb)
        frag_max_size = IPCB(skb)->frag_max_size;
        BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size;
 
-       if (nf_bridge->mask & BRNF_PKT_TYPE) {
+       if (nf_bridge->pkt_otherhost) {
                skb->pkt_type = PACKET_OTHERHOST;
-               nf_bridge->mask ^= BRNF_PKT_TYPE;
+               nf_bridge->pkt_otherhost = false;
        }
        nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
        if (dnat_took_place(skb)) {
@@ -485,20 +501,21 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct
 /* Some common code for IPv4/IPv6 */
 static struct net_device *setup_pre_routing(struct sk_buff *skb)
 {
-       struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+       struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
 
        if (skb->pkt_type == PACKET_OTHERHOST) {
                skb->pkt_type = PACKET_HOST;
-               nf_bridge->mask |= BRNF_PKT_TYPE;
+               nf_bridge->pkt_otherhost = true;
        }
 
        nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING;
        nf_bridge->physindev = skb->dev;
        skb->dev = brnf_get_logical_dev(skb, skb->dev);
+
        if (skb->protocol == htons(ETH_P_8021Q))
-               nf_bridge->mask |= BRNF_8021Q;
+               nf_bridge->orig_proto = BRNF_PROTO_8021Q;
        else if (skb->protocol == htons(ETH_P_PPP_SES))
-               nf_bridge->mask |= BRNF_PPPoE;
+               nf_bridge->orig_proto = BRNF_PROTO_PPPOE;
 
        /* Must drop socket now because of tproxy. */
        skb_orphan(skb);
@@ -680,14 +697,21 @@ static unsigned int br_nf_local_in(const struct nf_hook_ops *ops,
 /* PF_BRIDGE/FORWARD *************************************************/
 static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb)
 {
-       struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+       struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
        struct net_device *in;
 
        if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) {
+               int frag_max_size;
+
+               if (skb->protocol == htons(ETH_P_IP)) {
+                       frag_max_size = IPCB(skb)->frag_max_size;
+                       BR_INPUT_SKB_CB(skb)->frag_max_size = frag_max_size;
+               }
+
                in = nf_bridge->physindev;
-               if (nf_bridge->mask & BRNF_PKT_TYPE) {
+               if (nf_bridge->pkt_otherhost) {
                        skb->pkt_type = PACKET_OTHERHOST;
-                       nf_bridge->mask ^= BRNF_PKT_TYPE;
+                       nf_bridge->pkt_otherhost = false;
                }
                nf_bridge_update_protocol(skb);
        } else {
@@ -722,6 +746,10 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
        if (!nf_bridge_unshare(skb))
                return NF_DROP;
 
+       nf_bridge = nf_bridge_info_get(skb);
+       if (!nf_bridge)
+               return NF_DROP;
+
        parent = bridge_parent(state->out);
        if (!parent)
                return NF_DROP;
@@ -735,14 +763,19 @@ static unsigned int br_nf_forward_ip(const struct nf_hook_ops *ops,
 
        nf_bridge_pull_encap_header(skb);
 
-       nf_bridge = skb->nf_bridge;
        if (skb->pkt_type == PACKET_OTHERHOST) {
                skb->pkt_type = PACKET_HOST;
-               nf_bridge->mask |= BRNF_PKT_TYPE;
+               nf_bridge->pkt_otherhost = true;
        }
 
-       if (pf == NFPROTO_IPV4 && br_parse_ip_options(skb))
-               return NF_DROP;
+       if (pf == NFPROTO_IPV4) {
+               int frag_max = BR_INPUT_SKB_CB(skb)->frag_max_size;
+
+               if (br_parse_ip_options(skb))
+                       return NF_DROP;
+
+               IPCB(skb)->frag_max_size = frag_max;
+       }
 
        nf_bridge->physoutdev = skb->dev;
        if (pf == NFPROTO_IPV4)
@@ -792,30 +825,22 @@ static unsigned int br_nf_forward_arp(const struct nf_hook_ops *ops,
 }
 
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV4)
-static bool nf_bridge_copy_header(struct sk_buff *skb)
+static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb)
 {
+       struct brnf_frag_data *data;
        int err;
-       unsigned int header_size;
 
-       nf_bridge_update_protocol(skb);
-       header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
-       err = skb_cow_head(skb, header_size);
-       if (err)
-               return false;
-
-       skb_copy_to_linear_data_offset(skb, -header_size,
-                                      skb->nf_bridge->data, header_size);
-       __skb_push(skb, nf_bridge_encap_header_len(skb));
-       return true;
-}
+       data = this_cpu_ptr(&brnf_frag_data_storage);
+       err = skb_cow_head(skb, data->size);
 
-static int br_nf_push_frag_xmit(struct sock *sk, struct sk_buff *skb)
-{
-       if (!nf_bridge_copy_header(skb)) {
+       if (err) {
                kfree_skb(skb);
                return 0;
        }
 
+       skb_copy_to_linear_data_offset(skb, -data->size, data->mac, data->size);
+       __skb_push(skb, data->encap_size);
+
        return br_dev_queue_push_xmit(sk, skb);
 }
 
@@ -833,14 +858,27 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
         * boundaries by preserving frag_list rather than refragmenting.
         */
        if (skb->len + mtu_reserved > skb->dev->mtu) {
+               struct brnf_frag_data *data;
+
                frag_max_size = BR_INPUT_SKB_CB(skb)->frag_max_size;
                if (br_parse_ip_options(skb))
                        /* Drop invalid packet */
                        return NF_DROP;
                IPCB(skb)->frag_max_size = frag_max_size;
+
+               nf_bridge_update_protocol(skb);
+
+               data = this_cpu_ptr(&brnf_frag_data_storage);
+               data->encap_size = nf_bridge_encap_header_len(skb);
+               data->size = ETH_HLEN + data->encap_size;
+
+               skb_copy_from_linear_data_offset(skb, -data->size, data->mac,
+                                                data->size);
+
                ret = ip_fragment(sk, skb, br_nf_push_frag_xmit);
-       } else
+       } else {
                ret = br_dev_queue_push_xmit(sk, skb);
+       }
 
        return ret;
 }
@@ -856,7 +894,7 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
                                       struct sk_buff *skb,
                                       const struct nf_hook_state *state)
 {
-       struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+       struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
        struct net_device *realoutdev = bridge_parent(skb->dev);
        u_int8_t pf;
 
@@ -882,11 +920,10 @@ static unsigned int br_nf_post_routing(const struct nf_hook_ops *ops,
         * about the value of skb->pkt_type. */
        if (skb->pkt_type == PACKET_OTHERHOST) {
                skb->pkt_type = PACKET_HOST;
-               nf_bridge->mask |= BRNF_PKT_TYPE;
+               nf_bridge->pkt_otherhost = true;
        }
 
        nf_bridge_pull_encap_header(skb);
-       nf_bridge_save_header(skb);
        if (pf == NFPROTO_IPV4)
                skb->protocol = htons(ETH_P_IP);
        else
@@ -925,13 +962,16 @@ static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops,
  */
 static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
 {
-       struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+       struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
 
        skb_pull(skb, ETH_HLEN);
        nf_bridge->mask &= ~BRNF_BRIDGED_DNAT;
 
-       skb_copy_to_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN),
-                                      skb->nf_bridge->data, ETH_HLEN-ETH_ALEN);
+       BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN));
+
+       skb_copy_to_linear_data_offset(skb, -(ETH_HLEN - ETH_ALEN),
+                                      nf_bridge->neigh_header,
+                                      ETH_HLEN - ETH_ALEN);
        skb->dev = nf_bridge->physindev;
        br_handle_frame_finish(NULL, skb);
 }
index c5b794da51a91fd8b2236379b532910cafc0568f..3262e41ff76f38a89db3fd7da8c771a51b273abf 100644 (file)
@@ -13,6 +13,7 @@
 #include <net/dst.h>
 #include <net/netfilter/ipv4/nf_reject.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_bridge.h>
 #include <net/netfilter/ipv4/nf_reject.h>
 
 const struct tcphdr *nf_reject_ip_tcphdr_get(struct sk_buff *oldskb,
@@ -146,7 +147,8 @@ void nf_send_reset(struct sk_buff *oldskb, int hook)
         */
        if (oldskb->nf_bridge) {
                struct ethhdr *oeth = eth_hdr(oldskb);
-               nskb->dev = oldskb->nf_bridge->physindev;
+
+               nskb->dev = nf_bridge_get_physindev(oldskb);
                niph->tot_len = htons(nskb->len);
                ip_send_check(niph);
                if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
index 3afdce03d94e7c2dd27d19fabff9250c0f5aaddc..94b4c6dfb400c90b6c368acb7ecb83649309dce0 100644 (file)
@@ -13,6 +13,7 @@
 #include <net/ip6_checksum.h>
 #include <net/netfilter/ipv6/nf_reject.h>
 #include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_bridge.h>
 #include <net/netfilter/ipv6/nf_reject.h>
 
 const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb,
@@ -195,7 +196,8 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
         */
        if (oldskb->nf_bridge) {
                struct ethhdr *oeth = eth_hdr(oldskb);
-               nskb->dev = oldskb->nf_bridge->physindev;
+
+               nskb->dev = nf_bridge_get_physindev(oldskb);
                nskb->protocol = htons(ETH_P_IPV6);
                ip6h->payload_len = htons(sizeof(struct tcphdr));
                if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
index 89f73a9e98741702f2ea324e49b7b2f75bfb69fd..a87d8b8ec730421403930c69061a2c7167db0a6a 100644 (file)
@@ -70,7 +70,7 @@ obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
 
 # nf_tables
 nf_tables-objs += nf_tables_core.o nf_tables_api.o
-nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o
+nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o
 nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o
 
 obj-$(CONFIG_NF_TABLES)                += nf_tables.o
index 758b002130d92f0e7fecf63627d426289cf03820..380ef5148ea11ef29b5e3ab8113b94905fbea389 100644 (file)
@@ -19,6 +19,7 @@
 #include <net/netlink.h>
 
 #include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
 #include <linux/netfilter/ipset/pfxlen.h>
 #include <linux/netfilter/ipset/ip_set.h>
 #include <linux/netfilter/ipset/ip_set_hash.h>
@@ -211,6 +212,22 @@ hash_netiface4_data_next(struct hash_netiface4_elem *next,
 #define HKEY_DATALEN   sizeof(struct hash_netiface4_elem_hashed)
 #include "ip_set_hash_gen.h"
 
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+static const char *get_physindev_name(const struct sk_buff *skb)
+{
+       struct net_device *dev = nf_bridge_get_physindev(skb);
+
+       return dev ? dev->name : NULL;
+}
+
+static const char *get_phyoutdev_name(const struct sk_buff *skb)
+{
+       struct net_device *dev = nf_bridge_get_physoutdev(skb);
+
+       return dev ? dev->name : NULL;
+}
+#endif
+
 static int
 hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
                    const struct xt_action_param *par,
@@ -234,16 +251,15 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
        e.ip &= ip_set_netmask(e.cidr);
 
 #define IFACE(dir)     (par->dir ? par->dir->name : NULL)
-#define PHYSDEV(dir)   (nf_bridge->dir ? nf_bridge->dir->name : NULL)
 #define SRCDIR         (opt->flags & IPSET_DIM_TWO_SRC)
 
        if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-               const struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+               e.iface = SRCDIR ? get_physindev_name(skb) :
+                                  get_phyoutdev_name(skb);
 
-               if (!nf_bridge)
+               if (!e.iface)
                        return -EINVAL;
-               e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev);
                e.physdev = 1;
 #else
                e.iface = NULL;
@@ -476,11 +492,11 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
 
        if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-               const struct nf_bridge_info *nf_bridge = skb->nf_bridge;
-
-               if (!nf_bridge)
+               e.iface = SRCDIR ? get_physindev_name(skb) :
+                                  get_phyoutdev_name(skb);
+               if (!e.iface)
                        return -EINVAL;
-               e.iface = SRCDIR ? PHYSDEV(physindev) : PHYSDEV(physoutdev);
+
                e.physdev = 1;
 #else
                e.iface = NULL;
index 2631876ac55be96aeec77ab1d15f5db8c5a80c49..a5aa5967b8e17b894a798dbf29d07bf12063fc16 100644 (file)
@@ -17,6 +17,7 @@
 #include <net/route.h>
 
 #include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
 #include <linux/netfilter/xt_LOG.h>
 #include <net/netfilter/nf_log.h>
 
@@ -163,10 +164,10 @@ nf_log_dump_packet_common(struct nf_log_buf *m, u_int8_t pf,
                const struct net_device *physindev;
                const struct net_device *physoutdev;
 
-               physindev = skb->nf_bridge->physindev;
+               physindev = nf_bridge_get_physindev(skb);
                if (physindev && in != physindev)
                        nf_log_buf_add(m, "PHYSIN=%s ", physindev->name);
-               physoutdev = skb->nf_bridge->physoutdev;
+               physoutdev = nf_bridge_get_physoutdev(skb);
                if (physoutdev && out != physoutdev)
                        nf_log_buf_add(m, "PHYSOUT=%s ", physoutdev->name);
        }
index 3f3ac57b2998f20a901410d4f997b2b2f794e3fb..2e88032cd5ad22fb1e910966167340a1ba8761f6 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/proc_fs.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
 #include <linux/seq_file.h>
 #include <linux/rcupdate.h>
 #include <net/protocol.h>
@@ -58,12 +59,14 @@ void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
                sock_put(state->sk);
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
        if (entry->skb->nf_bridge) {
-               struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
+               struct net_device *physdev;
 
-               if (nf_bridge->physindev)
-                       dev_put(nf_bridge->physindev);
-               if (nf_bridge->physoutdev)
-                       dev_put(nf_bridge->physoutdev);
+               physdev = nf_bridge_get_physindev(entry->skb);
+               if (physdev)
+                       dev_put(physdev);
+               physdev = nf_bridge_get_physoutdev(entry->skb);
+               if (physdev)
+                       dev_put(physdev);
        }
 #endif
        /* Drop reference to owner of hook which queued us. */
@@ -87,13 +90,12 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
                sock_hold(state->sk);
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
        if (entry->skb->nf_bridge) {
-               struct nf_bridge_info *nf_bridge = entry->skb->nf_bridge;
                struct net_device *physdev;
 
-               physdev = nf_bridge->physindev;
+               physdev = nf_bridge_get_physindev(entry->skb);
                if (physdev)
                        dev_hold(physdev);
-               physdev = nf_bridge->physoutdev;
+               physdev = nf_bridge_get_physoutdev(entry->skb);
                if (physdev)
                        dev_hold(physdev);
        }
index 5604c2df05d1a40f2e812d18d2a72fd895359100..0b96fa0d64b2f9bf536eed8a0778f36f8aa04508 100644 (file)
@@ -2159,7 +2159,7 @@ nft_select_set_ops(const struct nlattr * const nla[],
        features = 0;
        if (nla[NFTA_SET_FLAGS] != NULL) {
                features = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
-               features &= NFT_SET_INTERVAL | NFT_SET_MAP;
+               features &= NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_TIMEOUT;
        }
 
        bops       = NULL;
@@ -2216,6 +2216,8 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
        [NFTA_SET_POLICY]               = { .type = NLA_U32 },
        [NFTA_SET_DESC]                 = { .type = NLA_NESTED },
        [NFTA_SET_ID]                   = { .type = NLA_U32 },
+       [NFTA_SET_TIMEOUT]              = { .type = NLA_U64 },
+       [NFTA_SET_GC_INTERVAL]          = { .type = NLA_U32 },
 };
 
 static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
@@ -2366,6 +2368,13 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
                        goto nla_put_failure;
        }
 
+       if (set->timeout &&
+           nla_put_be64(skb, NFTA_SET_TIMEOUT, cpu_to_be64(set->timeout)))
+               goto nla_put_failure;
+       if (set->gc_int &&
+           nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int)))
+               goto nla_put_failure;
+
        if (set->policy != NFT_SET_POL_PERFORMANCE) {
                if (nla_put_be32(skb, NFTA_SET_POLICY, htonl(set->policy)))
                        goto nla_put_failure;
@@ -2578,7 +2587,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
        char name[IFNAMSIZ];
        unsigned int size;
        bool create;
-       u32 ktype, dtype, flags, policy;
+       u64 timeout;
+       u32 ktype, dtype, flags, policy, gc_int;
        struct nft_set_desc desc;
        int err;
 
@@ -2605,7 +2615,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
        if (nla[NFTA_SET_FLAGS] != NULL) {
                flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
                if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT |
-                             NFT_SET_INTERVAL | NFT_SET_MAP))
+                             NFT_SET_INTERVAL | NFT_SET_MAP |
+                             NFT_SET_TIMEOUT))
                        return -EINVAL;
        }
 
@@ -2631,6 +2642,19 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
        } else if (flags & NFT_SET_MAP)
                return -EINVAL;
 
+       timeout = 0;
+       if (nla[NFTA_SET_TIMEOUT] != NULL) {
+               if (!(flags & NFT_SET_TIMEOUT))
+                       return -EINVAL;
+               timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_TIMEOUT]));
+       }
+       gc_int = 0;
+       if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
+               if (!(flags & NFT_SET_TIMEOUT))
+                       return -EINVAL;
+               gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
+       }
+
        policy = NFT_SET_POL_PERFORMANCE;
        if (nla[NFTA_SET_POLICY] != NULL)
                policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
@@ -2699,6 +2723,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
        set->flags = flags;
        set->size  = desc.size;
        set->policy = policy;
+       set->timeout = timeout;
+       set->gc_int = gc_int;
 
        err = ops->init(set, &desc, nla);
        if (err < 0)
@@ -2785,12 +2811,13 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
        if (!list_empty(&set->bindings) && set->flags & NFT_SET_ANONYMOUS)
                return -EBUSY;
 
-       if (set->flags & NFT_SET_MAP) {
+       if (binding->flags & NFT_SET_MAP) {
                /* If the set is already bound to the same chain all
                 * jumps are already validated for that chain.
                 */
                list_for_each_entry(i, &set->bindings, list) {
-                       if (i->chain == binding->chain)
+                       if (binding->flags & NFT_SET_MAP &&
+                           i->chain == binding->chain)
                                goto bind;
                }
 
@@ -2837,6 +2864,18 @@ const struct nft_set_ext_type nft_set_ext_types[] = {
                .len    = sizeof(u8),
                .align  = __alignof__(u8),
        },
+       [NFT_SET_EXT_TIMEOUT]           = {
+               .len    = sizeof(u64),
+               .align  = __alignof__(u64),
+       },
+       [NFT_SET_EXT_EXPIRATION]        = {
+               .len    = sizeof(unsigned long),
+               .align  = __alignof__(unsigned long),
+       },
+       [NFT_SET_EXT_USERDATA]          = {
+               .len    = sizeof(struct nft_userdata),
+               .align  = __alignof__(struct nft_userdata),
+       },
 };
 EXPORT_SYMBOL_GPL(nft_set_ext_types);
 
@@ -2848,6 +2887,9 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
        [NFTA_SET_ELEM_KEY]             = { .type = NLA_NESTED },
        [NFTA_SET_ELEM_DATA]            = { .type = NLA_NESTED },
        [NFTA_SET_ELEM_FLAGS]           = { .type = NLA_U32 },
+       [NFTA_SET_ELEM_TIMEOUT]         = { .type = NLA_U64 },
+       [NFTA_SET_ELEM_USERDATA]        = { .type = NLA_BINARY,
+                                           .len = NFT_USERDATA_MAXLEN },
 };
 
 static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {
@@ -2909,6 +2951,34 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
                         htonl(*nft_set_ext_flags(ext))))
                goto nla_put_failure;
 
+       if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) &&
+           nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT,
+                        cpu_to_be64(*nft_set_ext_timeout(ext))))
+               goto nla_put_failure;
+
+       if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
+               unsigned long expires, now = jiffies;
+
+               expires = *nft_set_ext_expiration(ext);
+               if (time_before(now, expires))
+                       expires -= now;
+               else
+                       expires = 0;
+
+               if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION,
+                                cpu_to_be64(jiffies_to_msecs(expires))))
+                       goto nla_put_failure;
+       }
+
+       if (nft_set_ext_exists(ext, NFT_SET_EXT_USERDATA)) {
+               struct nft_userdata *udata;
+
+               udata = nft_set_ext_userdata(ext);
+               if (nla_put(skb, NFTA_SET_ELEM_USERDATA,
+                           udata->len + 1, udata->data))
+                       goto nla_put_failure;
+       }
+
        nla_nest_end(skb, nest);
        return 0;
 
@@ -3128,11 +3198,11 @@ static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
        return trans;
 }
 
-static void *nft_set_elem_init(const struct nft_set *set,
-                              const struct nft_set_ext_tmpl *tmpl,
-                              const struct nft_data *key,
-                              const struct nft_data *data,
-                              gfp_t gfp)
+void *nft_set_elem_init(const struct nft_set *set,
+                       const struct nft_set_ext_tmpl *tmpl,
+                       const struct nft_data *key,
+                       const struct nft_data *data,
+                       u64 timeout, gfp_t gfp)
 {
        struct nft_set_ext *ext;
        void *elem;
@@ -3147,6 +3217,11 @@ static void *nft_set_elem_init(const struct nft_set *set,
        memcpy(nft_set_ext_key(ext), key, set->klen);
        if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
                memcpy(nft_set_ext_data(ext), data, set->dlen);
+       if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION))
+               *nft_set_ext_expiration(ext) =
+                       jiffies + msecs_to_jiffies(timeout);
+       if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT))
+               *nft_set_ext_timeout(ext) = timeout;
 
        return elem;
 }
@@ -3172,15 +3247,15 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
        struct nft_set_ext *ext;
        struct nft_set_elem elem;
        struct nft_set_binding *binding;
+       struct nft_userdata *udata;
        struct nft_data data;
        enum nft_registers dreg;
        struct nft_trans *trans;
+       u64 timeout;
        u32 flags;
+       u8 ulen;
        int err;
 
-       if (set->size && set->nelems == set->size)
-               return -ENFILE;
-
        err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
                               nft_set_elem_policy);
        if (err < 0)
@@ -3215,6 +3290,15 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
                        return -EINVAL;
        }
 
+       timeout = 0;
+       if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) {
+               if (!(set->flags & NFT_SET_TIMEOUT))
+                       return -EINVAL;
+               timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_ELEM_TIMEOUT]));
+       } else if (set->flags & NFT_SET_TIMEOUT) {
+               timeout = set->timeout;
+       }
+
        err = nft_data_init(ctx, &elem.key, &d1, nla[NFTA_SET_ELEM_KEY]);
        if (err < 0)
                goto err1;
@@ -3223,6 +3307,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
                goto err2;
 
        nft_set_ext_add(&tmpl, NFT_SET_EXT_KEY);
+       if (timeout > 0) {
+               nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION);
+               if (timeout != set->timeout)
+                       nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
+       }
 
        if (nla[NFTA_SET_ELEM_DATA] != NULL) {
                err = nft_data_init(ctx, &data, &d2, nla[NFTA_SET_ELEM_DATA]);
@@ -3241,6 +3330,9 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
                                .chain  = (struct nft_chain *)binding->chain,
                        };
 
+                       if (!(binding->flags & NFT_SET_MAP))
+                               continue;
+
                        err = nft_validate_data_load(&bind_ctx, dreg,
                                                     &data, d2.type);
                        if (err < 0)
@@ -3250,20 +3342,38 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
                nft_set_ext_add(&tmpl, NFT_SET_EXT_DATA);
        }
 
+       /* The full maximum length of userdata can exceed the maximum
+        * offset value (U8_MAX) for following extensions, therefor it
+        * must be the last extension added.
+        */
+       ulen = 0;
+       if (nla[NFTA_SET_ELEM_USERDATA] != NULL) {
+               ulen = nla_len(nla[NFTA_SET_ELEM_USERDATA]);
+               if (ulen > 0)
+                       nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA,
+                                              ulen);
+       }
+
        err = -ENOMEM;
-       elem.priv = nft_set_elem_init(set, &tmpl, &elem.key, &data, GFP_KERNEL);
+       elem.priv = nft_set_elem_init(set, &tmpl, &elem.key, &data,
+                                     timeout, GFP_KERNEL);
        if (elem.priv == NULL)
                goto err3;
 
        ext = nft_set_elem_ext(set, elem.priv);
        if (flags)
                *nft_set_ext_flags(ext) = flags;
+       if (ulen > 0) {
+               udata = nft_set_ext_userdata(ext);
+               udata->len = ulen - 1;
+               nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen);
+       }
 
        trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
        if (trans == NULL)
                goto err4;
 
-       ext->genmask = nft_genmask_cur(ctx->net);
+       ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK;
        err = set->ops->insert(set, &elem);
        if (err < 0)
                goto err5;
@@ -3316,11 +3426,15 @@ static int nf_tables_newsetelem(struct sock *nlsk, struct sk_buff *skb,
                return -EBUSY;
 
        nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
+               if (set->size &&
+                   !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact))
+                       return -ENFILE;
+
                err = nft_add_set_elem(&ctx, set, attr);
-               if (err < 0)
+               if (err < 0) {
+                       atomic_dec(&set->nelems);
                        break;
-
-               set->nelems++;
+               }
        }
        return err;
 }
@@ -3402,11 +3516,36 @@ static int nf_tables_delsetelem(struct sock *nlsk, struct sk_buff *skb,
                if (err < 0)
                        break;
 
-               set->nelems--;
+               set->ndeact++;
        }
        return err;
 }
 
+void nft_set_gc_batch_release(struct rcu_head *rcu)
+{
+       struct nft_set_gc_batch *gcb;
+       unsigned int i;
+
+       gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu);
+       for (i = 0; i < gcb->head.cnt; i++)
+               nft_set_elem_destroy(gcb->head.set, gcb->elems[i]);
+       kfree(gcb);
+}
+EXPORT_SYMBOL_GPL(nft_set_gc_batch_release);
+
+struct nft_set_gc_batch *nft_set_gc_batch_alloc(const struct nft_set *set,
+                                               gfp_t gfp)
+{
+       struct nft_set_gc_batch *gcb;
+
+       gcb = kzalloc(sizeof(*gcb), gfp);
+       if (gcb == NULL)
+               return gcb;
+       gcb->head.set = set;
+       return gcb;
+}
+EXPORT_SYMBOL_GPL(nft_set_gc_batch_alloc);
+
 static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
                                   u32 portid, u32 seq)
 {
@@ -3710,6 +3849,8 @@ static int nf_tables_commit(struct sk_buff *skb)
                                                 &te->elem,
                                                 NFT_MSG_DELSETELEM, 0);
                        te->set->ops->remove(te->set, &te->elem);
+                       atomic_dec(&te->set->nelems);
+                       te->set->ndeact--;
                        break;
                }
        }
@@ -3813,16 +3954,16 @@ static int nf_tables_abort(struct sk_buff *skb)
                        nft_trans_destroy(trans);
                        break;
                case NFT_MSG_NEWSETELEM:
-                       nft_trans_elem_set(trans)->nelems--;
                        te = (struct nft_trans_elem *)trans->data;
 
                        te->set->ops->remove(te->set, &te->elem);
+                       atomic_dec(&te->set->nelems);
                        break;
                case NFT_MSG_DELSETELEM:
                        te = (struct nft_trans_elem *)trans->data;
 
-                       nft_trans_elem_set(trans)->nelems++;
                        te->set->ops->activate(te->set, &te->elem);
+                       te->set->ndeact--;
 
                        nft_trans_destroy(trans);
                        break;
@@ -3960,7 +4101,8 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
                        continue;
 
                list_for_each_entry(binding, &set->bindings, list) {
-                       if (binding->chain != chain)
+                       if (!(binding->flags & NFT_SET_MAP) ||
+                           binding->chain != chain)
                                continue;
 
                        iter.skip       = 0;
index ef4dfcbaf149f4c207f0096ceb6b8a6c1aa3d924..7caf08a9225d29c3621896c881896ac03765827f 100644 (file)
@@ -239,8 +239,14 @@ int __init nf_tables_core_module_init(void)
        if (err < 0)
                goto err6;
 
+       err = nft_dynset_module_init();
+       if (err < 0)
+               goto err7;
+
        return 0;
 
+err7:
+       nft_payload_module_exit();
 err6:
        nft_byteorder_module_exit();
 err5:
@@ -257,6 +263,7 @@ err1:
 
 void nf_tables_core_module_exit(void)
 {
+       nft_dynset_module_exit();
        nft_payload_module_exit();
        nft_byteorder_module_exit();
        nft_bitwise_module_exit();
index 957b83a0223b8eef159b572a2b685095a2d3e0ab..51afea4b0af78a46c41099cd55ca9506f3636835 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/ipv6.h>
 #include <linux/netdevice.h>
 #include <linux/netfilter.h>
+#include <linux/netfilter_bridge.h>
 #include <net/netlink.h>
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_log.h>
@@ -448,14 +449,18 @@ __build_packet_message(struct nfnl_log_net *log,
                                         htonl(br_port_get_rcu(indev)->br->dev->ifindex)))
                                goto nla_put_failure;
                } else {
+                       struct net_device *physindev;
+
                        /* Case 2: indev is bridge group, we need to look for
                         * physical device (when called from ipv4) */
                        if (nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV,
                                         htonl(indev->ifindex)))
                                goto nla_put_failure;
-                       if (skb->nf_bridge && skb->nf_bridge->physindev &&
+
+                       physindev = nf_bridge_get_physindev(skb);
+                       if (physindev &&
                            nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV,
-                                        htonl(skb->nf_bridge->physindev->ifindex)))
+                                        htonl(physindev->ifindex)))
                                goto nla_put_failure;
                }
 #endif
@@ -479,14 +484,18 @@ __build_packet_message(struct nfnl_log_net *log,
                                         htonl(br_port_get_rcu(outdev)->br->dev->ifindex)))
                                goto nla_put_failure;
                } else {
+                       struct net_device *physoutdev;
+
                        /* Case 2: indev is a bridge group, we need to look
                         * for physical device (when called from ipv4) */
                        if (nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV,
                                         htonl(outdev->ifindex)))
                                goto nla_put_failure;
-                       if (skb->nf_bridge && skb->nf_bridge->physoutdev &&
+
+                       physoutdev = nf_bridge_get_physoutdev(skb);
+                       if (physoutdev &&
                            nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV,
-                                        htonl(skb->nf_bridge->physoutdev->ifindex)))
+                                        htonl(physoutdev->ifindex)))
                                goto nla_put_failure;
                }
 #endif
index 6e74655a8d4f153818243e27750c2660e0e52380..628afc350c025f7012fa927c03ec3bdc6b3b6a2c 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/proc_fs.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv6.h>
+#include <linux/netfilter_bridge.h>
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_queue.h>
 #include <linux/list.h>
@@ -396,14 +397,18 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
                                         htonl(br_port_get_rcu(indev)->br->dev->ifindex)))
                                goto nla_put_failure;
                } else {
+                       int physinif;
+
                        /* Case 2: indev is bridge group, we need to look for
                         * physical device (when called from ipv4) */
                        if (nla_put_be32(skb, NFQA_IFINDEX_INDEV,
                                         htonl(indev->ifindex)))
                                goto nla_put_failure;
-                       if (entskb->nf_bridge && entskb->nf_bridge->physindev &&
+
+                       physinif = nf_bridge_get_physinif(entskb);
+                       if (physinif &&
                            nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV,
-                                        htonl(entskb->nf_bridge->physindev->ifindex)))
+                                        htonl(physinif)))
                                goto nla_put_failure;
                }
 #endif
@@ -426,14 +431,18 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
                                         htonl(br_port_get_rcu(outdev)->br->dev->ifindex)))
                                goto nla_put_failure;
                } else {
+                       int physoutif;
+
                        /* Case 2: outdev is bridge group, we need to look for
                         * physical output device (when called from ipv4) */
                        if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV,
                                         htonl(outdev->ifindex)))
                                goto nla_put_failure;
-                       if (entskb->nf_bridge && entskb->nf_bridge->physoutdev &&
+
+                       physoutif = nf_bridge_get_physoutif(entskb);
+                       if (physoutif &&
                            nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV,
-                                        htonl(entskb->nf_bridge->physoutdev->ifindex)))
+                                        htonl(physoutif)))
                                goto nla_put_failure;
                }
 #endif
@@ -765,11 +774,12 @@ dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
                        return 1;
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
        if (entry->skb->nf_bridge) {
-               if (entry->skb->nf_bridge->physindev &&
-                   entry->skb->nf_bridge->physindev->ifindex == ifindex)
-                       return 1;
-               if (entry->skb->nf_bridge->physoutdev &&
-                   entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
+               int physinif, physoutif;
+
+               physinif = nf_bridge_get_physinif(entry->skb);
+               physoutif = nf_bridge_get_physoutif(entry->skb);
+
+               if (physinif == ifindex || physoutif == ifindex)
                        return 1;
        }
 #endif
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
new file mode 100644 (file)
index 0000000..eeb72de
--- /dev/null
@@ -0,0 +1,218 @@
+/*
+ * Copyright (c) 2015 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+
+struct nft_dynset {
+       struct nft_set                  *set;
+       struct nft_set_ext_tmpl         tmpl;
+       enum nft_dynset_ops             op:8;
+       enum nft_registers              sreg_key:8;
+       enum nft_registers              sreg_data:8;
+       u64                             timeout;
+       struct nft_set_binding          binding;
+};
+
+static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
+                           struct nft_data data[NFT_REG_MAX + 1])
+{
+       const struct nft_dynset *priv = nft_expr_priv(expr);
+       u64 timeout;
+       void *elem;
+
+       if (set->size && !atomic_add_unless(&set->nelems, 1, set->size))
+               return NULL;
+
+       timeout = priv->timeout ? : set->timeout;
+       elem = nft_set_elem_init(set, &priv->tmpl,
+                                &data[priv->sreg_key], &data[priv->sreg_data],
+                                timeout, GFP_ATOMIC);
+       if (elem == NULL) {
+               if (set->size)
+                       atomic_dec(&set->nelems);
+       }
+       return elem;
+}
+
+static void nft_dynset_eval(const struct nft_expr *expr,
+                           struct nft_data data[NFT_REG_MAX + 1],
+                           const struct nft_pktinfo *pkt)
+{
+       const struct nft_dynset *priv = nft_expr_priv(expr);
+       struct nft_set *set = priv->set;
+       const struct nft_set_ext *ext;
+       u64 timeout;
+
+       if (set->ops->update(set, &data[priv->sreg_key], nft_dynset_new,
+                            expr, data, &ext)) {
+               if (priv->op == NFT_DYNSET_OP_UPDATE &&
+                   nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
+                       timeout = priv->timeout ? : set->timeout;
+                       *nft_set_ext_expiration(ext) = jiffies + timeout;
+                       return;
+               }
+       }
+
+       data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = {
+       [NFTA_DYNSET_SET_NAME]  = { .type = NLA_STRING },
+       [NFTA_DYNSET_SET_ID]    = { .type = NLA_U32 },
+       [NFTA_DYNSET_OP]        = { .type = NLA_U32 },
+       [NFTA_DYNSET_SREG_KEY]  = { .type = NLA_U32 },
+       [NFTA_DYNSET_SREG_DATA] = { .type = NLA_U32 },
+       [NFTA_DYNSET_TIMEOUT]   = { .type = NLA_U64 },
+};
+
+static int nft_dynset_init(const struct nft_ctx *ctx,
+                          const struct nft_expr *expr,
+                          const struct nlattr * const tb[])
+{
+       struct nft_dynset *priv = nft_expr_priv(expr);
+       struct nft_set *set;
+       u64 timeout;
+       int err;
+
+       if (tb[NFTA_DYNSET_SET_NAME] == NULL ||
+           tb[NFTA_DYNSET_OP] == NULL ||
+           tb[NFTA_DYNSET_SREG_KEY] == NULL)
+               return -EINVAL;
+
+       set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME]);
+       if (IS_ERR(set)) {
+               if (tb[NFTA_DYNSET_SET_ID])
+                       set = nf_tables_set_lookup_byid(ctx->net,
+                                                       tb[NFTA_DYNSET_SET_ID]);
+               if (IS_ERR(set))
+                       return PTR_ERR(set);
+       }
+
+       if (set->flags & NFT_SET_CONSTANT)
+               return -EBUSY;
+
+       priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP]));
+       switch (priv->op) {
+       case NFT_DYNSET_OP_ADD:
+               break;
+       case NFT_DYNSET_OP_UPDATE:
+               if (!(set->flags & NFT_SET_TIMEOUT))
+                       return -EOPNOTSUPP;
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
+       timeout = 0;
+       if (tb[NFTA_DYNSET_TIMEOUT] != NULL) {
+               if (!(set->flags & NFT_SET_TIMEOUT))
+                       return -EINVAL;
+               timeout = be64_to_cpu(nla_get_be64(tb[NFTA_DYNSET_TIMEOUT]));
+       }
+
+       priv->sreg_key = ntohl(nla_get_be32(tb[NFTA_DYNSET_SREG_KEY]));
+       err = nft_validate_input_register(priv->sreg_key);
+       if (err < 0)
+               return err;
+
+       if (tb[NFTA_DYNSET_SREG_DATA] != NULL) {
+               if (!(set->flags & NFT_SET_MAP))
+                       return -EINVAL;
+               if (set->dtype == NFT_DATA_VERDICT)
+                       return -EOPNOTSUPP;
+
+               priv->sreg_data = ntohl(nla_get_be32(tb[NFTA_DYNSET_SREG_DATA]));
+               err = nft_validate_input_register(priv->sreg_data);
+               if (err < 0)
+                       return err;
+       } else if (set->flags & NFT_SET_MAP)
+               return -EINVAL;
+
+       nft_set_ext_prepare(&priv->tmpl);
+       nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_KEY, set->klen);
+       if (set->flags & NFT_SET_MAP)
+               nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_DATA, set->dlen);
+       if (set->flags & NFT_SET_TIMEOUT) {
+               if (timeout || set->timeout)
+                       nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_EXPIRATION);
+       }
+
+       priv->timeout = timeout;
+
+       err = nf_tables_bind_set(ctx, set, &priv->binding);
+       if (err < 0)
+               return err;
+
+       priv->set = set;
+       return 0;
+}
+
+static void nft_dynset_destroy(const struct nft_ctx *ctx,
+                              const struct nft_expr *expr)
+{
+       struct nft_dynset *priv = nft_expr_priv(expr);
+
+       nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+}
+
+static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+       const struct nft_dynset *priv = nft_expr_priv(expr);
+
+       if (nla_put_be32(skb, NFTA_DYNSET_SREG_KEY, htonl(priv->sreg_key)))
+               goto nla_put_failure;
+       if (priv->set->flags & NFT_SET_MAP &&
+           nla_put_be32(skb, NFTA_DYNSET_SREG_DATA, htonl(priv->sreg_data)))
+               goto nla_put_failure;
+       if (nla_put_be32(skb, NFTA_DYNSET_OP, htonl(priv->op)))
+               goto nla_put_failure;
+       if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name))
+               goto nla_put_failure;
+       if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, cpu_to_be64(priv->timeout)))
+               goto nla_put_failure;
+       return 0;
+
+nla_put_failure:
+       return -1;
+}
+
+static struct nft_expr_type nft_dynset_type;
+static const struct nft_expr_ops nft_dynset_ops = {
+       .type           = &nft_dynset_type,
+       .size           = NFT_EXPR_SIZE(sizeof(struct nft_dynset)),
+       .eval           = nft_dynset_eval,
+       .init           = nft_dynset_init,
+       .destroy        = nft_dynset_destroy,
+       .dump           = nft_dynset_dump,
+};
+
+static struct nft_expr_type nft_dynset_type __read_mostly = {
+       .name           = "dynset",
+       .ops            = &nft_dynset_ops,
+       .policy         = nft_dynset_policy,
+       .maxattr        = NFTA_DYNSET_MAX,
+       .owner          = THIS_MODULE,
+};
+
+int __init nft_dynset_module_init(void)
+{
+       return nft_register_expr(&nft_dynset_type);
+}
+
+void nft_dynset_module_exit(void)
+{
+       nft_unregister_expr(&nft_dynset_type);
+}
index c7e1a9d7d46f515c9ef80f67d8fffe630ddafb01..bc23806b7fbef29005dbb9d4adc35ae4d76ff16e 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/log2.h>
 #include <linux/jhash.h>
 #include <linux/netlink.h>
+#include <linux/workqueue.h>
 #include <linux/rhashtable.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter/nf_tables.h>
@@ -25,6 +26,7 @@
 
 struct nft_hash {
        struct rhashtable               ht;
+       struct delayed_work             gc_work;
 };
 
 struct nft_hash_elem {
@@ -62,6 +64,8 @@ static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg,
 
        if (nft_data_cmp(nft_set_ext_key(&he->ext), x->key, x->set->klen))
                return 1;
+       if (nft_set_elem_expired(&he->ext))
+               return 1;
        if (!nft_set_elem_active(&he->ext, x->genmask))
                return 1;
        return 0;
@@ -86,6 +90,42 @@ static bool nft_hash_lookup(const struct nft_set *set,
        return !!he;
 }
 
+static bool nft_hash_update(struct nft_set *set, const struct nft_data *key,
+                           void *(*new)(struct nft_set *,
+                                        const struct nft_expr *,
+                                        struct nft_data []),
+                           const struct nft_expr *expr,
+                           struct nft_data data[],
+                           const struct nft_set_ext **ext)
+{
+       struct nft_hash *priv = nft_set_priv(set);
+       struct nft_hash_elem *he;
+       struct nft_hash_cmp_arg arg = {
+               .genmask = NFT_GENMASK_ANY,
+               .set     = set,
+               .key     = key,
+       };
+
+       he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+       if (he != NULL)
+               goto out;
+
+       he = new(set, expr, data);
+       if (he == NULL)
+               goto err1;
+       if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
+                                        nft_hash_params))
+               goto err2;
+out:
+       *ext = &he->ext;
+       return true;
+
+err2:
+       nft_set_elem_destroy(set, he);
+err1:
+       return false;
+}
+
 static int nft_hash_insert(const struct nft_set *set,
                           const struct nft_set_elem *elem)
 {
@@ -107,6 +147,7 @@ static void nft_hash_activate(const struct nft_set *set,
        struct nft_hash_elem *he = elem->priv;
 
        nft_set_elem_change_active(set, &he->ext);
+       nft_set_elem_clear_busy(&he->ext);
 }
 
 static void *nft_hash_deactivate(const struct nft_set *set,
@@ -120,9 +161,15 @@ static void *nft_hash_deactivate(const struct nft_set *set,
                .key     = &elem->key,
        };
 
+       rcu_read_lock();
        he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
-       if (he != NULL)
-               nft_set_elem_change_active(set, &he->ext);
+       if (he != NULL) {
+               if (!nft_set_elem_mark_busy(&he->ext))
+                       nft_set_elem_change_active(set, &he->ext);
+               else
+                       he = NULL;
+       }
+       rcu_read_unlock();
 
        return he;
 }
@@ -170,6 +217,8 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
 
                if (iter->count < iter->skip)
                        goto cont;
+               if (nft_set_elem_expired(&he->ext))
+                       goto cont;
                if (!nft_set_elem_active(&he->ext, genmask))
                        goto cont;
 
@@ -188,6 +237,55 @@ out:
        rhashtable_walk_exit(&hti);
 }
 
+static void nft_hash_gc(struct work_struct *work)
+{
+       struct nft_set *set;
+       struct nft_hash_elem *he;
+       struct nft_hash *priv;
+       struct nft_set_gc_batch *gcb = NULL;
+       struct rhashtable_iter hti;
+       int err;
+
+       priv = container_of(work, struct nft_hash, gc_work.work);
+       set  = nft_set_container_of(priv);
+
+       err = rhashtable_walk_init(&priv->ht, &hti);
+       if (err)
+               goto schedule;
+
+       err = rhashtable_walk_start(&hti);
+       if (err && err != -EAGAIN)
+               goto out;
+
+       while ((he = rhashtable_walk_next(&hti))) {
+               if (IS_ERR(he)) {
+                       if (PTR_ERR(he) != -EAGAIN)
+                               goto out;
+                       continue;
+               }
+
+               if (!nft_set_elem_expired(&he->ext))
+                       continue;
+               if (nft_set_elem_mark_busy(&he->ext))
+                       continue;
+
+               gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC);
+               if (gcb == NULL)
+                       goto out;
+               rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params);
+               atomic_dec(&set->nelems);
+               nft_set_gc_batch_add(gcb, he);
+       }
+out:
+       rhashtable_walk_stop(&hti);
+       rhashtable_walk_exit(&hti);
+
+       nft_set_gc_batch_complete(gcb);
+schedule:
+       queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+                          nft_set_gc_interval(set));
+}
+
 static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
 {
        return sizeof(struct nft_hash);
@@ -207,11 +305,20 @@ static int nft_hash_init(const struct nft_set *set,
 {
        struct nft_hash *priv = nft_set_priv(set);
        struct rhashtable_params params = nft_hash_params;
+       int err;
 
        params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT;
        params.key_len    = set->klen;
 
-       return rhashtable_init(&priv->ht, &params);
+       err = rhashtable_init(&priv->ht, &params);
+       if (err < 0)
+               return err;
+
+       INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc);
+       if (set->flags & NFT_SET_TIMEOUT)
+               queue_delayed_work(system_power_efficient_wq, &priv->gc_work,
+                                  nft_set_gc_interval(set));
+       return 0;
 }
 
 static void nft_hash_elem_destroy(void *ptr, void *arg)
@@ -223,6 +330,7 @@ static void nft_hash_destroy(const struct nft_set *set)
 {
        struct nft_hash *priv = nft_set_priv(set);
 
+       cancel_delayed_work_sync(&priv->gc_work);
        rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy,
                                    (void *)set);
 }
@@ -263,8 +371,9 @@ static struct nft_set_ops nft_hash_ops __read_mostly = {
        .deactivate     = nft_hash_deactivate,
        .remove         = nft_hash_remove,
        .lookup         = nft_hash_lookup,
+       .update         = nft_hash_update,
        .walk           = nft_hash_walk,
-       .features       = NFT_SET_MAP,
+       .features       = NFT_SET_MAP | NFT_SET_TIMEOUT,
        .owner          = THIS_MODULE,
 };
 
index a5f30b8760eab5aa476f0afc13fe0e8686c9ff47..d8cf86fb30fc33fdf657a03da1320f331bc38c9a 100644 (file)
@@ -92,6 +92,8 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
        } else if (set->flags & NFT_SET_MAP)
                return -EINVAL;
 
+       priv->binding.flags = set->flags & NFT_SET_MAP;
+
        err = nf_tables_bind_set(ctx, set, &priv->binding);
        if (err < 0)
                return err;
index 5197874372ec4a2055a3f9251f3a3ec248f53fbb..d79ce88be77f3568aa9e6409f7de20242d9c343f 100644 (file)
@@ -166,9 +166,8 @@ void nft_meta_get_eval(const struct nft_expr *expr,
                dest->data[0] = out->group;
                break;
        case NFT_META_CGROUP:
-               if (skb->sk == NULL)
-                       break;
-
+               if (skb->sk == NULL || !sk_fullsock(skb->sk))
+                       goto err;
                dest->data[0] = skb->sk->sk_classid;
                break;
        default:
index 7198d660b4dea1e9e79c6f9a13f4e6669bca569d..a1d126f2946305a10ccc04ce92e469b1255f60f9 100644 (file)
@@ -39,7 +39,7 @@ cgroup_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
        const struct xt_cgroup_info *info = par->matchinfo;
 
-       if (skb->sk == NULL)
+       if (skb->sk == NULL || !sk_fullsock(skb->sk))
                return false;
 
        return (info->id == skb->sk->sk_classid) ^ info->invert;
index 50a52043650fd95989eb6618a36cbb8dba0f6b18..1caaccbc306c7751f717aa1e223cf2ef30f00bec 100644 (file)
@@ -25,16 +25,15 @@ MODULE_ALIAS("ip6t_physdev");
 static bool
 physdev_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
-       static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
        const struct xt_physdev_info *info = par->matchinfo;
+       const struct net_device *physdev;
        unsigned long ret;
        const char *indev, *outdev;
-       const struct nf_bridge_info *nf_bridge;
 
        /* Not a bridged IP packet or no info available yet:
         * LOCAL_OUT/mangle and LOCAL_OUT/nat don't know if
         * the destination device will be a bridge. */
-       if (!(nf_bridge = skb->nf_bridge)) {
+       if (!skb->nf_bridge) {
                /* Return MATCH if the invert flags of the used options are on */
                if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) &&
                    !(info->invert & XT_PHYSDEV_OP_BRIDGED))
@@ -54,30 +53,41 @@ physdev_mt(const struct sk_buff *skb, struct xt_action_param *par)
                return true;
        }
 
+       physdev = nf_bridge_get_physoutdev(skb);
+       outdev = physdev ? physdev->name : NULL;
+
        /* This only makes sense in the FORWARD and POSTROUTING chains */
        if ((info->bitmask & XT_PHYSDEV_OP_BRIDGED) &&
-           (!!nf_bridge->physoutdev ^ !(info->invert & XT_PHYSDEV_OP_BRIDGED)))
+           (!!outdev ^ !(info->invert & XT_PHYSDEV_OP_BRIDGED)))
                return false;
 
+       physdev = nf_bridge_get_physindev(skb);
+       indev = physdev ? physdev->name : NULL;
+
        if ((info->bitmask & XT_PHYSDEV_OP_ISIN &&
-           (!nf_bridge->physindev ^ !!(info->invert & XT_PHYSDEV_OP_ISIN))) ||
+           (!indev ^ !!(info->invert & XT_PHYSDEV_OP_ISIN))) ||
            (info->bitmask & XT_PHYSDEV_OP_ISOUT &&
-           (!nf_bridge->physoutdev ^ !!(info->invert & XT_PHYSDEV_OP_ISOUT))))
+           (!outdev ^ !!(info->invert & XT_PHYSDEV_OP_ISOUT))))
                return false;
 
        if (!(info->bitmask & XT_PHYSDEV_OP_IN))
                goto match_outdev;
-       indev = nf_bridge->physindev ? nf_bridge->physindev->name : nulldevname;
-       ret = ifname_compare_aligned(indev, info->physindev, info->in_mask);
 
-       if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN))
-               return false;
+       if (indev) {
+               ret = ifname_compare_aligned(indev, info->physindev,
+                                            info->in_mask);
+
+               if (!ret ^ !(info->invert & XT_PHYSDEV_OP_IN))
+                       return false;
+       }
 
 match_outdev:
        if (!(info->bitmask & XT_PHYSDEV_OP_OUT))
                return true;
-       outdev = nf_bridge->physoutdev ?
-                nf_bridge->physoutdev->name : nulldevname;
+
+       if (!outdev)
+               return false;
+
        ret = ifname_compare_aligned(outdev, info->physoutdev, info->out_mask);
 
        return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT));
index 895534e87a47a5bb3c4452f93a019831c43754ca..e092cb04632607f21f253d84a8692681a1fefe89 100644 (file)
@@ -143,13 +143,10 @@ static bool xt_socket_sk_is_transparent(struct sock *sk)
        }
 }
 
-static bool
-socket_match(const struct sk_buff *skb, struct xt_action_param *par,
-            const struct xt_socket_mtinfo1 *info)
+static struct sock *xt_socket_lookup_slow_v4(const struct sk_buff *skb,
+                                            const struct net_device *indev)
 {
        const struct iphdr *iph = ip_hdr(skb);
-       struct udphdr _hdr, *hp = NULL;
-       struct sock *sk = skb->sk;
        __be32 uninitialized_var(daddr), uninitialized_var(saddr);
        __be16 uninitialized_var(dport), uninitialized_var(sport);
        u8 uninitialized_var(protocol);
@@ -159,10 +156,12 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 #endif
 
        if (iph->protocol == IPPROTO_UDP || iph->protocol == IPPROTO_TCP) {
+               struct udphdr _hdr, *hp;
+
                hp = skb_header_pointer(skb, ip_hdrlen(skb),
                                        sizeof(_hdr), &_hdr);
                if (hp == NULL)
-                       return false;
+                       return NULL;
 
                protocol = iph->protocol;
                saddr = iph->saddr;
@@ -172,16 +171,17 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 
        } else if (iph->protocol == IPPROTO_ICMP) {
                if (extract_icmp4_fields(skb, &protocol, &saddr, &daddr,
-                                       &sport, &dport))
-                       return false;
+                                        &sport, &dport))
+                       return NULL;
        } else {
-               return false;
+               return NULL;
        }
 
 #ifdef XT_SOCKET_HAVE_CONNTRACK
-       /* Do the lookup with the original socket address in case this is a
-        * reply packet of an established SNAT-ted connection. */
-
+       /* Do the lookup with the original socket address in
+        * case this is a reply packet of an established
+        * SNAT-ted connection.
+        */
        ct = nf_ct_get(skb, &ctinfo);
        if (ct && !nf_ct_is_untracked(ct) &&
            ((iph->protocol != IPPROTO_ICMP &&
@@ -197,10 +197,18 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
        }
 #endif
 
+       return xt_socket_get_sock_v4(dev_net(skb->dev), protocol, saddr, daddr,
+                                    sport, dport, indev);
+}
+
+static bool
+socket_match(const struct sk_buff *skb, struct xt_action_param *par,
+            const struct xt_socket_mtinfo1 *info)
+{
+       struct sock *sk = skb->sk;
+
        if (!sk)
-               sk = xt_socket_get_sock_v4(dev_net(skb->dev), protocol,
-                                          saddr, daddr, sport, dport,
-                                          par->in);
+               sk = xt_socket_lookup_slow_v4(skb, par->in);
        if (sk) {
                bool wildcard;
                bool transparent = true;
@@ -225,12 +233,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
                        sk = NULL;
        }
 
-       pr_debug("proto %hhu %pI4:%hu -> %pI4:%hu (orig %pI4:%hu) sock %p\n",
-                protocol, &saddr, ntohs(sport),
-                &daddr, ntohs(dport),
-                &iph->daddr, hp ? ntohs(hp->dest) : 0, sk);
-
-       return (sk != NULL);
+       return sk != NULL;
 }
 
 static bool
@@ -327,28 +330,26 @@ xt_socket_get_sock_v6(struct net *net, const u8 protocol,
        return NULL;
 }
 
-static bool
-socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
+static struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb,
+                                            const struct net_device *indev)
 {
-       struct ipv6hdr ipv6_var, *iph = ipv6_hdr(skb);
-       struct udphdr _hdr, *hp = NULL;
-       struct sock *sk = skb->sk;
-       const struct in6_addr *daddr = NULL, *saddr = NULL;
        __be16 uninitialized_var(dport), uninitialized_var(sport);
-       int thoff = 0, uninitialized_var(tproto);
-       const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
+       const struct in6_addr *daddr = NULL, *saddr = NULL;
+       struct ipv6hdr *iph = ipv6_hdr(skb);
+       int thoff = 0, tproto;
 
        tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
        if (tproto < 0) {
                pr_debug("unable to find transport header in IPv6 packet, dropping\n");
-               return NF_DROP;
+               return NULL;
        }
 
        if (tproto == IPPROTO_UDP || tproto == IPPROTO_TCP) {
-               hp = skb_header_pointer(skb, thoff,
-                                       sizeof(_hdr), &_hdr);
+               struct udphdr _hdr, *hp;
+
+               hp = skb_header_pointer(skb, thoff, sizeof(_hdr), &_hdr);
                if (hp == NULL)
-                       return false;
+                       return NULL;
 
                saddr = &iph->saddr;
                sport = hp->source;
@@ -356,17 +357,27 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
                dport = hp->dest;
 
        } else if (tproto == IPPROTO_ICMPV6) {
+               struct ipv6hdr ipv6_var;
+
                if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr,
                                         &sport, &dport, &ipv6_var))
-                       return false;
+                       return NULL;
        } else {
-               return false;
+               return NULL;
        }
 
+       return xt_socket_get_sock_v6(dev_net(skb->dev), tproto, saddr, daddr,
+                                    sport, dport, indev);
+}
+
+static bool
+socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
+{
+       const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
+       struct sock *sk = skb->sk;
+
        if (!sk)
-               sk = xt_socket_get_sock_v6(dev_net(skb->dev), tproto,
-                                          saddr, daddr, sport, dport,
-                                          par->in);
+               sk = xt_socket_lookup_slow_v6(skb, par->in);
        if (sk) {
                bool wildcard;
                bool transparent = true;
@@ -391,13 +402,7 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
                        sk = NULL;
        }
 
-       pr_debug("proto %hhd %pI6:%hu -> %pI6:%hu "
-                "(orig %pI6:%hu) sock %p\n",
-                tproto, saddr, ntohs(sport),
-                daddr, ntohs(dport),
-                &iph->daddr, hp ? ntohs(hp->dest) : 0, sk);
-
-       return (sk != NULL);
+       return sk != NULL;
 }
 #endif