Merge tag 'drm-intel-fixes-2015-07-15' into drm-intel-next-queued
[firefly-linux-kernel-4.4.55.git] / net / core / dev.c
index aa82f9ab6a36d164769bf7c9633fcdfd5971466f..6778a9999d525307d5bd41a1750a6e96a6e22bf3 100644 (file)
 #include <linux/if_macvlan.h>
 #include <linux/errqueue.h>
 #include <linux/hrtimer.h>
+#include <linux/netfilter_ingress.h>
 
 #include "net-sysfs.h"
 
@@ -468,10 +469,14 @@ EXPORT_SYMBOL(dev_remove_pack);
  */
 void dev_add_offload(struct packet_offload *po)
 {
-       struct list_head *head = &offload_base;
+       struct packet_offload *elem;
 
        spin_lock(&offload_lock);
-       list_add_rcu(&po->list, head);
+       list_for_each_entry(elem, &offload_base, list) {
+               if (po->priority < elem->priority)
+                       break;
+       }
+       list_add_rcu(&po->list, elem->list.prev);
        spin_unlock(&offload_lock);
 }
 EXPORT_SYMBOL(dev_add_offload);
@@ -1630,7 +1635,7 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
 }
 EXPORT_SYMBOL(call_netdevice_notifiers);
 
-#ifdef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NET_INGRESS
 static struct static_key ingress_needed __read_mostly;
 
 void net_inc_ingress_queue(void)
@@ -2343,6 +2348,34 @@ void netif_device_attach(struct net_device *dev)
 }
 EXPORT_SYMBOL(netif_device_attach);
 
+/*
+ * Returns a Tx hash based on the given packet descriptor a Tx queues' number
+ * to be used as a distribution range.
+ */
+u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
+                 unsigned int num_tx_queues)
+{
+       u32 hash;
+       u16 qoffset = 0;
+       u16 qcount = num_tx_queues;
+
+       if (skb_rx_queue_recorded(skb)) {
+               hash = skb_get_rx_queue(skb);
+               while (unlikely(hash >= num_tx_queues))
+                       hash -= num_tx_queues;
+               return hash;
+       }
+
+       if (dev->num_tc) {
+               u8 tc = netdev_get_prio_tc_map(dev, skb->priority);
+               qoffset = dev->tc_to_txq[tc].offset;
+               qcount = dev->tc_to_txq[tc].count;
+       }
+
+       return (u16) reciprocal_scale(skb_get_hash(skb), qcount) + qoffset;
+}
+EXPORT_SYMBOL(__skb_tx_hash);
+
 static void skb_warn_bad_offload(const struct sk_buff *skb)
 {
        static const netdev_features_t null_features = 0;
@@ -2901,6 +2934,84 @@ int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(dev_loopback_xmit);
 
+static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+{
+#ifdef CONFIG_XPS
+       struct xps_dev_maps *dev_maps;
+       struct xps_map *map;
+       int queue_index = -1;
+
+       rcu_read_lock();
+       dev_maps = rcu_dereference(dev->xps_maps);
+       if (dev_maps) {
+               map = rcu_dereference(
+                   dev_maps->cpu_map[skb->sender_cpu - 1]);
+               if (map) {
+                       if (map->len == 1)
+                               queue_index = map->queues[0];
+                       else
+                               queue_index = map->queues[reciprocal_scale(skb_get_hash(skb),
+                                                                          map->len)];
+                       if (unlikely(queue_index >= dev->real_num_tx_queues))
+                               queue_index = -1;
+               }
+       }
+       rcu_read_unlock();
+
+       return queue_index;
+#else
+       return -1;
+#endif
+}
+
+static u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
+{
+       struct sock *sk = skb->sk;
+       int queue_index = sk_tx_queue_get(sk);
+
+       if (queue_index < 0 || skb->ooo_okay ||
+           queue_index >= dev->real_num_tx_queues) {
+               int new_index = get_xps_queue(dev, skb);
+               if (new_index < 0)
+                       new_index = skb_tx_hash(dev, skb);
+
+               if (queue_index != new_index && sk &&
+                   rcu_access_pointer(sk->sk_dst_cache))
+                       sk_tx_queue_set(sk, new_index);
+
+               queue_index = new_index;
+       }
+
+       return queue_index;
+}
+
+struct netdev_queue *netdev_pick_tx(struct net_device *dev,
+                                   struct sk_buff *skb,
+                                   void *accel_priv)
+{
+       int queue_index = 0;
+
+#ifdef CONFIG_XPS
+       if (skb->sender_cpu == 0)
+               skb->sender_cpu = raw_smp_processor_id() + 1;
+#endif
+
+       if (dev->real_num_tx_queues != 1) {
+               const struct net_device_ops *ops = dev->netdev_ops;
+               if (ops->ndo_select_queue)
+                       queue_index = ops->ndo_select_queue(dev, skb, accel_priv,
+                                                           __netdev_pick_tx);
+               else
+                       queue_index = __netdev_pick_tx(dev, skb);
+
+               if (!accel_priv)
+                       queue_index = netdev_cap_txqueue(dev, queue_index);
+       }
+
+       skb_set_queue_mapping(skb, queue_index);
+       return netdev_get_tx_queue(dev, queue_index);
+}
+
 /**
  *     __dev_queue_xmit - transmit a buffer
  *     @skb: buffer to transmit
@@ -3513,66 +3624,47 @@ int (*br_fdb_test_addr_hook)(struct net_device *dev,
 EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
 #endif
 
-#ifdef CONFIG_NET_CLS_ACT
-/* TODO: Maybe we should just force sch_ingress to be compiled in
- * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
- * a compare and 2 stores extra right now if we dont have it on
- * but have CONFIG_NET_CLS_ACT
- * NOTE: This doesn't stop any functionality; if you dont have
- * the ingress scheduler, you just can't add policies on ingress.
- *
- */
-static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
-{
-       struct net_device *dev = skb->dev;
-       u32 ttl = G_TC_RTTL(skb->tc_verd);
-       int result = TC_ACT_OK;
-       struct Qdisc *q;
-
-       if (unlikely(MAX_RED_LOOP < ttl++)) {
-               net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n",
-                                    skb->skb_iif, dev->ifindex);
-               return TC_ACT_SHOT;
-       }
-
-       skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
-       skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
-
-       q = rcu_dereference(rxq->qdisc);
-       if (q != &noop_qdisc) {
-               spin_lock(qdisc_lock(q));
-               if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
-                       result = qdisc_enqueue_root(skb, q);
-               spin_unlock(qdisc_lock(q));
-       }
-
-       return result;
-}
-
 static inline struct sk_buff *handle_ing(struct sk_buff *skb,
                                         struct packet_type **pt_prev,
                                         int *ret, struct net_device *orig_dev)
 {
-       struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
+#ifdef CONFIG_NET_CLS_ACT
+       struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list);
+       struct tcf_result cl_res;
 
-       if (!rxq || rcu_access_pointer(rxq->qdisc) == &noop_qdisc)
+       /* If there's at least one ingress present somewhere (so
+        * we get here via enabled static key), remaining devices
+        * that are not configured with an ingress qdisc will bail
+        * out here.
+        */
+       if (!cl)
                return skb;
-
        if (*pt_prev) {
                *ret = deliver_skb(skb, *pt_prev, orig_dev);
                *pt_prev = NULL;
        }
 
-       switch (ing_filter(skb, rxq)) {
+       qdisc_skb_cb(skb)->pkt_len = skb->len;
+       skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
+       qdisc_bstats_update_cpu(cl->q, skb);
+
+       switch (tc_classify(skb, cl, &cl_res)) {
+       case TC_ACT_OK:
+       case TC_ACT_RECLASSIFY:
+               skb->tc_index = TC_H_MIN(cl_res.classid);
+               break;
        case TC_ACT_SHOT:
+               qdisc_qstats_drop_cpu(cl->q);
        case TC_ACT_STOLEN:
+       case TC_ACT_QUEUED:
                kfree_skb(skb);
                return NULL;
+       default:
+               break;
        }
-
+#endif /* CONFIG_NET_CLS_ACT */
        return skb;
 }
-#endif
 
 /**
  *     netdev_rx_handler_register - register receive handler
@@ -3645,6 +3737,22 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
        }
 }
 
+static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
+                            int *ret, struct net_device *orig_dev)
+{
+#ifdef CONFIG_NETFILTER_INGRESS
+       if (nf_hook_ingress_active(skb)) {
+               if (*pt_prev) {
+                       *ret = deliver_skb(skb, *pt_prev, orig_dev);
+                       *pt_prev = NULL;
+               }
+
+               return nf_hook_ingress(skb);
+       }
+#endif /* CONFIG_NETFILTER_INGRESS */
+       return 0;
+}
+
 static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
 {
        struct packet_type *ptype, *pt_prev;
@@ -3704,13 +3812,17 @@ another_round:
        }
 
 skip_taps:
-#ifdef CONFIG_NET_CLS_ACT
+#ifdef CONFIG_NET_INGRESS
        if (static_key_false(&ingress_needed)) {
                skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
                if (!skb)
                        goto unlock;
-       }
 
+               if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
+                       goto unlock;
+       }
+#endif
+#ifdef CONFIG_NET_CLS_ACT
        skb->tc_verd = 0;
 ncls:
 #endif
@@ -6313,6 +6425,17 @@ static int netif_alloc_netdev_queues(struct net_device *dev)
        return 0;
 }
 
+void netif_tx_stop_all_queues(struct net_device *dev)
+{
+       unsigned int i;
+
+       for (i = 0; i < dev->num_tx_queues; i++) {
+               struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
+               netif_tx_stop_queue(txq);
+       }
+}
+EXPORT_SYMBOL(netif_tx_stop_all_queues);
+
 /**
  *     register_netdevice      - register a network device
  *     @dev: device to register
@@ -6862,6 +6985,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
        dev->group = INIT_NETDEV_GROUP;
        if (!dev->ethtool_ops)
                dev->ethtool_ops = &default_ethtool_ops;
+
+       nf_hook_ingress_init(dev);
+
        return dev;
 
 free_all: